Switch numeric suffix parsing to use the new system.

huonw · huonw · commit 606a309d4aeb · 2014-11-20T00:02:42.000+11:00
This moves errors and all handling of numeric suffixes into the parser
rather than the lexer.
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
@@ -672,16 +672,6 @@ impl<'a> StringReader<'a> {
                 '0'...'9' | '_' | '.' => {
                     num_digits = self.scan_digits(10) + 1;
                 }
-                'u' | 'i' => {
-                    self.scan_int_suffix();
-                    return token::Integer(self.name_from(start_bpos));
-                },
-                'f' => {
-                    let last_pos = self.last_pos;
-                    self.scan_float_suffix();
-                    self.check_float_base(start_bpos, last_pos, base);
-                    return token::Float(self.name_from(start_bpos));
-                }
                 _ => {
                     // just a 0
                     return token::Integer(self.name_from(start_bpos));
@@ -695,8 +685,6 @@ impl<'a> StringReader<'a> {
 
         if num_digits == 0 {
             self.err_span_(start_bpos, self.last_pos, "no valid digits found for number");
-            // eat any suffix
-            self.scan_int_suffix();
             return token::Integer(token::intern("0"));
         }
 
@@ -711,28 +699,19 @@ impl<'a> StringReader<'a> {
             if self.curr.unwrap_or('\0').is_digit_radix(10) {
                 self.scan_digits(10);
                 self.scan_float_exponent();
-                self.scan_float_suffix();
             }
             let last_pos = self.last_pos;
             self.check_float_base(start_bpos, last_pos, base);
             return token::Float(self.name_from(start_bpos));
-        } else if self.curr_is('f') {
-            // or it might be an integer literal suffixed as a float
-            self.scan_float_suffix();
-            let last_pos = self.last_pos;
-            self.check_float_base(start_bpos, last_pos, base);
-            return token::Float(self.name_from(start_bpos));
         } else {
             // it might be a float if it has an exponent
             if self.curr_is('e') || self.curr_is('E') {
                 self.scan_float_exponent();
-                self.scan_float_suffix();
                 let last_pos = self.last_pos;
                 self.check_float_base(start_bpos, last_pos, base);
                 return token::Float(self.name_from(start_bpos));
             }
             // but we certainly have an integer!
-            self.scan_int_suffix();
             return token::Integer(self.name_from(start_bpos));
         }
     }
@@ -869,55 +848,6 @@ impl<'a> StringReader<'a> {
         true
     }
 
-    /// Scan over an int literal suffix.
-    fn scan_int_suffix(&mut self) {
-        match self.curr {
-            Some('i') | Some('u') => {
-                self.bump();
-
-                if self.curr_is('8') {
-                    self.bump();
-                } else if self.curr_is('1') {
-                    if !self.nextch_is('6') {
-                        self.err_span_(self.last_pos, self.pos,
-                                      "illegal int suffix");
-                    } else {
-                        self.bump(); self.bump();
-                    }
-                } else if self.curr_is('3') {
-                    if !self.nextch_is('2') {
-                        self.err_span_(self.last_pos, self.pos,
-                                      "illegal int suffix");
-                    } else {
-                        self.bump(); self.bump();
-                    }
-                } else if self.curr_is('6') {
-                    if !self.nextch_is('4') {
-                        self.err_span_(self.last_pos, self.pos,
-                                      "illegal int suffix");
-                    } else {
-                        self.bump(); self.bump();
-                    }
-                }
-            },
-            _ => { }
-        }
-    }
-
-    /// Scan over a float literal suffix
-    fn scan_float_suffix(&mut self) {
-        if self.curr_is('f') {
-            if (self.nextch_is('3') && self.nextnextch_is('2'))
-            || (self.nextch_is('6') && self.nextnextch_is('4')) {
-                self.bump();
-                self.bump();
-                self.bump();
-            } else {
-                self.err_span_(self.last_pos, self.pos, "illegal float suffix");
-            }
-        }
-    }
-
     /// Scan over a float exponent.
     fn scan_float_exponent(&mut self) {
         if self.curr_is('e') || self.curr_is('E') {
@@ -988,6 +918,7 @@ impl<'a> StringReader<'a> {
         if is_dec_digit(c) {
             let num = self.scan_number(c.unwrap());
             let suffix = self.scan_optional_raw_name();
+            debug!("next_token_inner: scanned number {}, {}", num, suffix);
             return token::Literal(num, suffix)
         }
 
@@ -1609,6 +1540,9 @@ mod test {
         test!("1.0", Float, "1.0");
         test!("1.0e10", Float, "1.0e10");
 
+        assert_eq!(setup(&mk_sh(), "2u".to_string()).next_token().tok,
+                   token::Literal(token::Integer(token::intern("2")),
+                                  Some(token::intern("u"))));
         assert_eq!(setup(&mk_sh(), "r###\"raw\"###suffix".to_string()).next_token().tok,
                    token::Literal(token::StrRaw(token::intern("raw"), 3),
                                   Some(token::intern("suffix"))));
diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs
@@ -511,28 +511,41 @@ pub fn raw_str_lit(lit: &str) -> String {
     res
 }
 
-pub fn float_lit(s: &str) -> ast::Lit_ {
-    debug!("float_lit: {}", s);
-    // FIXME #2252: bounds checking float literals is defered until trans
-    let s2 = s.chars().filter(|&c| c != '_').collect::<String>();
-    let s = s2.as_slice();
-
-    let mut ty = None;
-
-    if s.ends_with("f32") {
-        ty = Some(ast::TyF32);
-    } else if s.ends_with("f64") {
-        ty = Some(ast::TyF64);
-    }
+// check if `s` looks like i32 or u1234 etc.
+fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool {
+    s.len() > 1 &&
+        first_chars.contains(&s.char_at(0)) &&
+        s.slice_from(1).chars().all(|c| '0' <= c && c <= '9')
+}
 
+fn filtered_float_lit(data: token::InternedString, suffix: Option<&str>,
+                      sd: &SpanHandler, sp: Span) -> ast::Lit_ {
+    debug!("filtered_float_lit: {}, {}", data, suffix);
+    match suffix {
+        Some("f32") => ast::LitFloat(data, ast::TyF32),
+        Some("f64") => ast::LitFloat(data, ast::TyF64),
+        Some(suf) => {
+            if suf.len() >= 2 && looks_like_width_suffix(&['f'], suf) {
+                // if it looks like a width, lets try to be helpful.
+                sd.span_err(sp, &*format!("illegal width `{}` for float literal, \
+                                          valid widths are 32 and 64", suf.slice_from(1)));
+            } else {
+                sd.span_err(sp, &*format!("illegal suffix `{}` for float literal, \
+                                          valid suffixes are `f32` and `f64`", suf));
+            }
 
-    match ty {
-        Some(t) => {
-            ast::LitFloat(token::intern_and_get_ident(s.slice_to(s.len() - t.suffix_len())), t)
-        },
-        None => ast::LitFloatUnsuffixed(token::intern_and_get_ident(s))
+            ast::LitFloatUnsuffixed(data)
+        }
+        None => ast::LitFloatUnsuffixed(data)
     }
 }
+pub fn float_lit(s: &str, suffix: Option<&str>, sd: &SpanHandler, sp: Span) -> ast::Lit_ {
+    debug!("float_lit: {}, {}", s, suffix);
+    // FIXME #2252: bounds checking float literals is defered until trans
+    let s = s.chars().filter(|&c| c != '_').collect::<String>();
+    let data = token::intern_and_get_ident(&*s);
+    filtered_float_lit(data, suffix, sd, sp)
+}
 
 /// Parse a string representing a byte literal into its final form. Similar to `char_lit`
 pub fn byte_lit(lit: &str) -> (u8, uint) {
@@ -626,24 +639,19 @@ pub fn binary_lit(lit: &str) -> Rc<Vec<u8>> {
     Rc::new(res)
 }
 
-pub fn integer_lit(s: &str, sd: &SpanHandler, sp: Span) -> ast::Lit_ {
+pub fn integer_lit(s: &str, suffix: Option<&str>, sd: &SpanHandler, sp: Span) -> ast::Lit_ {
     // s can only be ascii, byte indexing is fine
 
     let s2 = s.chars().filter(|&c| c != '_').collect::<String>();
     let mut s = s2.as_slice();
 
-    debug!("parse_integer_lit: {}", s);
-
-    if s.len() == 1 {
-        let n = (s.char_at(0)).to_digit(10).unwrap();
-        return ast::LitInt(n as u64, ast::UnsuffixedIntLit(ast::Sign::new(n)));
-    }
+    debug!("integer_lit: {}, {}", s, suffix);
 
     let mut base = 10;
     let orig = s;
     let mut ty = ast::UnsuffixedIntLit(ast::Plus);
 
-    if s.char_at(0) == '0' {
+    if s.char_at(0) == '0' && s.len() > 1 {
         match s.char_at(1) {
             'x' => base = 16,
             'o' => base = 8,
@@ -652,57 +660,56 @@ pub fn integer_lit(s: &str, sd: &SpanHandler, sp: Span) -> ast::Lit_ {
         }
     }
 
+    // 1f64 and 2f32 etc. are valid float literals.
+    match suffix {
+        Some(suf) if looks_like_width_suffix(&['f'], suf) => {
+            match base {
+                16u => sd.span_err(sp, "hexadecimal float literal is not supported"),
+                8u => sd.span_err(sp, "octal float literal is not supported"),
+                2u => sd.span_err(sp, "binary float literal is not supported"),
+                _ => ()
+            }
+            let ident = token::intern_and_get_ident(&*s);
+            return filtered_float_lit(ident, suffix, sd, sp)
+        }
+        _ => {}
+    }
+
     if base != 10 {
         s = s.slice_from(2);
     }
 
-    let last = s.len() - 1;
-    match s.char_at(last) {
-        'i' => ty = ast::SignedIntLit(ast::TyI, ast::Plus),
-        'u' => ty = ast::UnsignedIntLit(ast::TyU),
-        '8' => {
-            if s.len() > 2 {
-                match s.char_at(last - 1) {
-                    'i' => ty = ast::SignedIntLit(ast::TyI8, ast::Plus),
-                    'u' => ty = ast::UnsignedIntLit(ast::TyU8),
-                    _ => { }
-                }
-            }
-        },
-        '6' => {
-            if s.len() > 3 && s.char_at(last - 1) == '1' {
-                match s.char_at(last - 2) {
-                    'i' => ty = ast::SignedIntLit(ast::TyI16, ast::Plus),
-                    'u' => ty = ast::UnsignedIntLit(ast::TyU16),
-                    _ => { }
-                }
-            }
-        },
-        '2' => {
-            if s.len() > 3 && s.char_at(last - 1) == '3' {
-                match s.char_at(last - 2) {
-                    'i' => ty = ast::SignedIntLit(ast::TyI32, ast::Plus),
-                    'u' => ty = ast::UnsignedIntLit(ast::TyU32),
-                    _ => { }
-                }
-            }
-        },
-        '4' => {
-            if s.len() > 3 && s.char_at(last - 1) == '6' {
-                match s.char_at(last - 2) {
-                    'i' => ty = ast::SignedIntLit(ast::TyI64, ast::Plus),
-                    'u' => ty = ast::UnsignedIntLit(ast::TyU64),
-                    _ => { }
+    if let Some(suf) = suffix {
+        if suf.is_empty() { sd.span_bug(sp, "found empty literal suffix in Some")}
+        ty = match suf {
+            "i"   => ast::SignedIntLit(ast::TyI, ast::Plus),
+            "i8"  => ast::SignedIntLit(ast::TyI8, ast::Plus),
+            "i16" => ast::SignedIntLit(ast::TyI16, ast::Plus),
+            "i32" => ast::SignedIntLit(ast::TyI32, ast::Plus),
+            "i64" => ast::SignedIntLit(ast::TyI64, ast::Plus),
+            "u"   => ast::UnsignedIntLit(ast::TyU),
+            "u8"  => ast::UnsignedIntLit(ast::TyU8),
+            "u16" => ast::UnsignedIntLit(ast::TyU16),
+            "u32" => ast::UnsignedIntLit(ast::TyU32),
+            "u64" => ast::UnsignedIntLit(ast::TyU64),
+            _ => {
+                // i<digits> and u<digits> look like widths, so lets
+                // give an error message along those lines
+                if looks_like_width_suffix(&['i', 'u'], suf) {
+                    sd.span_err(sp, &*format!("illegal width `{}` for integer literal; \
+                                              valid widths are 8, 16, 32 and 64",
+                                              suf.slice_from(1)));
+                } else {
+                    sd.span_err(sp, &*format!("illegal suffix `{}` for numeric literal", suf));
                 }
+
+                ty
             }
-        },
-        _ => { }
+        }
     }
 
-    debug!("The suffix is {}, base {}, the new string is {}, the original \
-           string was {}", ty, base, s, orig);
-
-    s = s.slice_to(s.len() - ty.suffix_len());
+    debug!("integer_lit: the type is {}, base {}, the new string is {}, the original \
+           string was {}, the original suffix was {}", ty, base, s, orig, suffix);
 
     let res: u64 = match ::std::num::from_str_radix(s, base) {
         Some(r) => r,
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs
@@ -652,9 +652,9 @@ impl<'a> Parser<'a> {
             Some(suf) => {
                 let text = suf.as_str();
                 if text.is_empty() {
-                    self.span_bug(sp, "found empty non-None literal suffix")
+                    self.span_bug(sp, "found empty literal suffix in Some")
                 }
-                self.span_err(sp, &*format!("a {} with a suffix is illegal", kind));
+                self.span_err(sp, &*format!("{} with a suffix is illegal", kind));
             }
         }
     }
@@ -1661,10 +1661,23 @@ impl<'a> Parser<'a> {
                 let (suffix_illegal, out) = match lit {
                     token::Byte(i) => (true, LitByte(parse::byte_lit(i.as_str()).val0())),
                     token::Char(i) => (true, LitChar(parse::char_lit(i.as_str()).val0())),
-                    token::Integer(s) => (false, parse::integer_lit(s.as_str(),
-                                                            &self.sess.span_diagnostic,
-                                                            self.last_span)),
-                    token::Float(s) => (false, parse::float_lit(s.as_str())),
+
+                    // there are some valid suffixes for integer and
+                    // float literals, so all the handling is done
+                    // internally.
+                    token::Integer(s) => {
+                        (false, parse::integer_lit(s.as_str(),
+                                                   suf.as_ref().map(|s| s.as_str()),
+                                                   &self.sess.span_diagnostic,
+                                                   self.last_span))
+                    }
+                    token::Float(s) => {
+                        (false, parse::float_lit(s.as_str(),
+                                                 suf.as_ref().map(|s| s.as_str()),
+                                                  &self.sess.span_diagnostic,
+                                                 self.last_span))
+                    }
+
                     token::Str_(s) => {
                         (true,
                          LitStr(token::intern_and_get_ident(parse::str_lit(s.as_str()).as_slice()),
diff --git a/src/test/compile-fail/bad-lit-suffixes.rs b/src/test/compile-fail/bad-lit-suffixes.rs
@@ -29,8 +29,13 @@ fn main() {
     'a'suffix; //~ ERROR char literal with a suffix is illegal
     b'a'suffix; //~ ERROR byte literal with a suffix is illegal
 
-    1234suffix;
-    0b101suffix;
-    1.0suffix;
-    1.0e10suffix;
+    1234u1024; //~ ERROR illegal width `1024` for integer literal
+    1234i1024; //~ ERROR illegal width `1024` for integer literal
+    1234f1024; //~ ERROR illegal width `1024` for float literal
+    1234.5f1024; //~ ERROR illegal width `1024` for float literal
+
+    1234suffix; //~ ERROR illegal suffix `suffix` for numeric literal
+    0b101suffix; //~ ERROR illegal suffix `suffix` for numeric literal
+    1.0suffix; //~ ERROR illegal suffix `suffix` for numeric literal
+    1.0e10suffix; //~ ERROR illegal suffix `suffix` for numeric literal
 }