From 766b91d88b19aa0c2030e6d96c3fd8f4a7255891 Mon Sep 17 00:00:00 2001 From: Graydon Hoare Date: Tue, 17 Aug 2010 14:13:58 -0700 Subject: [PATCH] Add support for a reserved-words list and reserve the various not-supported-but-plausible IEEE datatypes. --- doc/rust.texi | 38 ++++++++++++++++++- src/boot/fe/lexer.mll | 87 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 108 insertions(+), 17 deletions(-) diff --git a/doc/rust.texi b/doc/rust.texi index 63181f8846a..fcfb6499b50 100644 --- a/doc/rust.texi +++ b/doc/rust.texi @@ -599,6 +599,7 @@ Unicode characters. * Ref.Lex.Ignore:: Ignored characters. * Ref.Lex.Ident:: Identifier tokens. * Ref.Lex.Key:: Keyword tokens. +* Ref.Lex.Res:: Reserved tokens. * Ref.Lex.Num:: Numeric tokens. * Ref.Lex.Text:: String and character tokens. * Ref.Lex.Syntax:: Syntactic extension tokens. @@ -636,7 +637,7 @@ token or a syntactic extension token. Multi-line comments may be nested. Identifiers follow the pattern of C identifiers: they begin with a @emph{letter} or @emph{underscore}, and continue with any combination of @emph{letters}, @emph{decimal digits} and underscores, and must not be equal -to any keyword. @xref{Ref.Lex.Key}. +to any keyword or reserved token. @xref{Ref.Lex.Key}. @xref{Ref.Lex.Res}. A @emph{letter} is a Unicode character in the ranges U+0061-U+007A and U+0041-U+005A (@code{'a'}-@code{'z'} and @code{'A'}-@code{'Z'}). @@ -728,6 +729,35 @@ The keywords are: @tab @code{be} @end multitable +@node Ref.Lex.Res +@subsection Ref.Lex.Res +@c * Ref.Lex.Res:: Reserved tokens. + +The reserved tokens are: +@cindex Reserved + +@sp 2 + +@multitable @columnfractions .15 .15 .15 .15 .15 +@item @code{f16} +@tab @code{f80} +@tab @code{f128} +@item @code{m32} +@tab @code{m64} +@tab @code{m128} +@tab @code{dec} +@end multitable + +@sp 2 + +At present these tokens have no defined meaning in the Rust language. + +These tokens may correspond, in some current or future implementation, +to additional built-in types for decimal floating-point, extended +binary and interchange floating-point formats, as defined in the IEEE +754-1985 and IEEE 754-2008 specifications. + + @node Ref.Lex.Num @subsection Ref.Lex.Num @c * Ref.Lex.Num:: Numeric tokens. @@ -785,6 +815,10 @@ only two floating-point suffixes: @code{f32} and @code{f64}. Each of these gives the floating point literal the associated type, rather than @code{float}. +A set of suffixes are also reserved to accommodate literal support for +types corresponding to reserved tokens. The reserved suffixes are @code{f16}, +@code{f80}, @code{f128}, @code{m}, @code{m32}, @code{m64} and @code{m128}. + @sp 1 A @dfn{hex digit} is either a @emph{decimal digit} or else a character in the ranges U+0061-U+0066 and U+0041-U+0046 (@code{'a'}-@code{'f'}, @@ -2024,7 +2058,7 @@ The signed two's complement word types @code{i8}, @code{i16}, @code{i32} and @end ifhtml respectively. @item -The IEEE 754 single-precision and double-precision floating-point types: +The IEEE 754-2008 @code{binary32} and @code{binary64} floating-point types: @code{f32} and @code{f64}, respectively. @end itemize diff --git a/src/boot/fe/lexer.mll b/src/boot/fe/lexer.mll index ed548b1e78c..af8eab6a7c6 100644 --- a/src/boot/fe/lexer.mll +++ b/src/boot/fe/lexer.mll @@ -27,8 +27,12 @@ <- (bump_line lexbuf.Lexing.lex_curr_p) ;; - let mach_suf_table = Hashtbl.create 0 + let mach_suf_table = Hashtbl.create 10 ;; + + let reserved_suf_table = Hashtbl.create 10 + ;; + let _ = List.iter (fun (suf, ty) -> Common.htab_put mach_suf_table suf ty) [ ("u8", Common.TY_u8); @@ -43,8 +47,24 @@ ("f64", Common.TY_f64); ] ;; + let _ = + List.iter (fun suf -> Common.htab_put reserved_suf_table suf ()) + [ "f16"; (* IEEE 754-2008 'binary16' interchange format. *) + "f80"; (* IEEE 754-1985 'extended' *) + "f128"; (* IEEE 754-2008 'binary128' *) + "m32"; (* IEEE 754-2008 'decimal32' *) + "m64"; (* IEEE 754-2008 'decimal64' *) + "m128"; (* IEEE 754-2008 'decimal128' *) + "m"; (* One of m32, m64, m128. *) + ] + ;; + let keyword_table = Hashtbl.create 100 ;; + + let reserved_table = Hashtbl.create 10 + ;; + let _ = List.iter (fun (kwd, tok) -> Common.htab_put keyword_table kwd tok) [ ("mod", MOD); @@ -141,6 +161,19 @@ ("f64", MACH TY_f64) ] ;; + + let _ = + List.iter (fun kwd -> Common.htab_put reserved_table kwd ()) + [ "f16"; (* IEEE 754-2008 'binary16' interchange format. *) + "f80"; (* IEEE 754-1985 'extended' *) + "f128"; (* IEEE 754-2008 'binary128' *) + "m32"; (* IEEE 754-2008 'decimal32' *) + "m64"; (* IEEE 754-2008 'decimal64' *) + "m128"; (* IEEE 754-2008 'decimal128' *) + "dec"; (* One of m32, m64, m128. *) + ]; + ;; + } let hexdig = ['0'-'9' 'a'-'f' 'A'-'F'] @@ -153,6 +186,7 @@ let flo = (dec '.' dec (exp?)) | (dec exp) let mach_float_suf = "f32"|"f64" let mach_int_suf = ['u''i']('8'|"16"|"32"|"64") +let flo_suf = ['m''f']("16"|"32"|"64"|"80"|"128") let ws = [ ' ' '\t' '\r' ] @@ -218,26 +252,39 @@ rule token = parse | ']' { RBRACKET } | id as i - { try - Hashtbl.find keyword_table i - with - Not_found -> IDENT (i) } + { + match Common.htab_search keyword_table i with + Some tok -> tok + | None -> + if Hashtbl.mem reserved_table i + then fail lexbuf "reserved keyword" + else IDENT (i) + } | (bin|hex|dec) as n { LIT_INT (Int64.of_string n) } | ((bin|hex|dec) as n) 'u' { LIT_UINT (Int64.of_string n) } | ((bin|hex|dec) as n) - (mach_int_suf as s) { try - let tm = - Hashtbl.find mach_suf_table s - in - LIT_MACH_INT - (tm, Int64.of_string n) - with - Not_found -> - fail lexbuf - "bad mach-int suffix" } + (mach_int_suf as s) + { + match Common.htab_search mach_suf_table s with + Some tm -> LIT_MACH_INT (tm, Int64.of_string n) + | None -> + if Hashtbl.mem reserved_suf_table s + then fail lexbuf "reserved mach-int suffix" + else fail lexbuf "bad mach-int suffix" + } | flo as n { LIT_FLOAT (float_of_string n) } +| flo 'm' { fail lexbuf "reseved mach-float suffix" } +| (flo as n) (flo_suf as s) + { + match Common.htab_search mach_suf_table s with + Some tm -> LIT_MACH_FLOAT (tm, float_of_string n) + | None -> + if Hashtbl.mem reserved_suf_table s + then fail lexbuf "reserved mach-float suffix" + else fail lexbuf "bad mach-float suffix" + } | '\'' { char lexbuf } | '"' { let buf = Buffer.create 32 in @@ -411,3 +458,13 @@ and comment depth = parse comment depth lexbuf } | _ { comment depth lexbuf } + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *)