Skip to content

Commit 606a309

Browse files
committed
Switch numeric suffix parsing to use the new system.
This moves errors and all handling of numeric suffixes into the parser rather than the lexer.
1 parent 6679595 commit 606a309

File tree

4 files changed

+108
-149
lines changed

4 files changed

+108
-149
lines changed

src/libsyntax/parse/lexer/mod.rs

+4-70
Original file line numberDiff line numberDiff line change
@@ -672,16 +672,6 @@ impl<'a> StringReader<'a> {
672672
'0'...'9' | '_' | '.' => {
673673
num_digits = self.scan_digits(10) + 1;
674674
}
675-
'u' | 'i' => {
676-
self.scan_int_suffix();
677-
return token::Integer(self.name_from(start_bpos));
678-
},
679-
'f' => {
680-
let last_pos = self.last_pos;
681-
self.scan_float_suffix();
682-
self.check_float_base(start_bpos, last_pos, base);
683-
return token::Float(self.name_from(start_bpos));
684-
}
685675
_ => {
686676
// just a 0
687677
return token::Integer(self.name_from(start_bpos));
@@ -695,8 +685,6 @@ impl<'a> StringReader<'a> {
695685

696686
if num_digits == 0 {
697687
self.err_span_(start_bpos, self.last_pos, "no valid digits found for number");
698-
// eat any suffix
699-
self.scan_int_suffix();
700688
return token::Integer(token::intern("0"));
701689
}
702690

@@ -711,28 +699,19 @@ impl<'a> StringReader<'a> {
711699
if self.curr.unwrap_or('\0').is_digit_radix(10) {
712700
self.scan_digits(10);
713701
self.scan_float_exponent();
714-
self.scan_float_suffix();
715702
}
716703
let last_pos = self.last_pos;
717704
self.check_float_base(start_bpos, last_pos, base);
718705
return token::Float(self.name_from(start_bpos));
719-
} else if self.curr_is('f') {
720-
// or it might be an integer literal suffixed as a float
721-
self.scan_float_suffix();
722-
let last_pos = self.last_pos;
723-
self.check_float_base(start_bpos, last_pos, base);
724-
return token::Float(self.name_from(start_bpos));
725706
} else {
726707
// it might be a float if it has an exponent
727708
if self.curr_is('e') || self.curr_is('E') {
728709
self.scan_float_exponent();
729-
self.scan_float_suffix();
730710
let last_pos = self.last_pos;
731711
self.check_float_base(start_bpos, last_pos, base);
732712
return token::Float(self.name_from(start_bpos));
733713
}
734714
// but we certainly have an integer!
735-
self.scan_int_suffix();
736715
return token::Integer(self.name_from(start_bpos));
737716
}
738717
}
@@ -869,55 +848,6 @@ impl<'a> StringReader<'a> {
869848
true
870849
}
871850

872-
/// Scan over an int literal suffix.
873-
fn scan_int_suffix(&mut self) {
874-
match self.curr {
875-
Some('i') | Some('u') => {
876-
self.bump();
877-
878-
if self.curr_is('8') {
879-
self.bump();
880-
} else if self.curr_is('1') {
881-
if !self.nextch_is('6') {
882-
self.err_span_(self.last_pos, self.pos,
883-
"illegal int suffix");
884-
} else {
885-
self.bump(); self.bump();
886-
}
887-
} else if self.curr_is('3') {
888-
if !self.nextch_is('2') {
889-
self.err_span_(self.last_pos, self.pos,
890-
"illegal int suffix");
891-
} else {
892-
self.bump(); self.bump();
893-
}
894-
} else if self.curr_is('6') {
895-
if !self.nextch_is('4') {
896-
self.err_span_(self.last_pos, self.pos,
897-
"illegal int suffix");
898-
} else {
899-
self.bump(); self.bump();
900-
}
901-
}
902-
},
903-
_ => { }
904-
}
905-
}
906-
907-
/// Scan over a float literal suffix
908-
fn scan_float_suffix(&mut self) {
909-
if self.curr_is('f') {
910-
if (self.nextch_is('3') && self.nextnextch_is('2'))
911-
|| (self.nextch_is('6') && self.nextnextch_is('4')) {
912-
self.bump();
913-
self.bump();
914-
self.bump();
915-
} else {
916-
self.err_span_(self.last_pos, self.pos, "illegal float suffix");
917-
}
918-
}
919-
}
920-
921851
/// Scan over a float exponent.
922852
fn scan_float_exponent(&mut self) {
923853
if self.curr_is('e') || self.curr_is('E') {
@@ -988,6 +918,7 @@ impl<'a> StringReader<'a> {
988918
if is_dec_digit(c) {
989919
let num = self.scan_number(c.unwrap());
990920
let suffix = self.scan_optional_raw_name();
921+
debug!("next_token_inner: scanned number {}, {}", num, suffix);
991922
return token::Literal(num, suffix)
992923
}
993924

@@ -1609,6 +1540,9 @@ mod test {
16091540
test!("1.0", Float, "1.0");
16101541
test!("1.0e10", Float, "1.0e10");
16111542

1543+
assert_eq!(setup(&mk_sh(), "2u".to_string()).next_token().tok,
1544+
token::Literal(token::Integer(token::intern("2")),
1545+
Some(token::intern("u"))));
16121546
assert_eq!(setup(&mk_sh(), "r###\"raw\"###suffix".to_string()).next_token().tok,
16131547
token::Literal(token::StrRaw(token::intern("raw"), 3),
16141548
Some(token::intern("suffix"))));

src/libsyntax/parse/mod.rs

+76-69
Original file line numberDiff line numberDiff line change
@@ -511,28 +511,41 @@ pub fn raw_str_lit(lit: &str) -> String {
511511
res
512512
}
513513

514-
pub fn float_lit(s: &str) -> ast::Lit_ {
515-
debug!("float_lit: {}", s);
516-
// FIXME #2252: bounds checking float literals is defered until trans
517-
let s2 = s.chars().filter(|&c| c != '_').collect::<String>();
518-
let s = s2.as_slice();
519-
520-
let mut ty = None;
521-
522-
if s.ends_with("f32") {
523-
ty = Some(ast::TyF32);
524-
} else if s.ends_with("f64") {
525-
ty = Some(ast::TyF64);
526-
}
514+
// check if `s` looks like i32 or u1234 etc.
515+
fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool {
516+
s.len() > 1 &&
517+
first_chars.contains(&s.char_at(0)) &&
518+
s.slice_from(1).chars().all(|c| '0' <= c && c <= '9')
519+
}
527520

521+
fn filtered_float_lit(data: token::InternedString, suffix: Option<&str>,
522+
sd: &SpanHandler, sp: Span) -> ast::Lit_ {
523+
debug!("filtered_float_lit: {}, {}", data, suffix);
524+
match suffix {
525+
Some("f32") => ast::LitFloat(data, ast::TyF32),
526+
Some("f64") => ast::LitFloat(data, ast::TyF64),
527+
Some(suf) => {
528+
if suf.len() >= 2 && looks_like_width_suffix(&['f'], suf) {
529+
// if it looks like a width, lets try to be helpful.
530+
sd.span_err(sp, &*format!("illegal width `{}` for float literal, \
531+
valid widths are 32 and 64", suf.slice_from(1)));
532+
} else {
533+
sd.span_err(sp, &*format!("illegal suffix `{}` for float literal, \
534+
valid suffixes are `f32` and `f64`", suf));
535+
}
528536

529-
match ty {
530-
Some(t) => {
531-
ast::LitFloat(token::intern_and_get_ident(s.slice_to(s.len() - t.suffix_len())), t)
532-
},
533-
None => ast::LitFloatUnsuffixed(token::intern_and_get_ident(s))
537+
ast::LitFloatUnsuffixed(data)
538+
}
539+
None => ast::LitFloatUnsuffixed(data)
534540
}
535541
}
542+
pub fn float_lit(s: &str, suffix: Option<&str>, sd: &SpanHandler, sp: Span) -> ast::Lit_ {
543+
debug!("float_lit: {}, {}", s, suffix);
544+
// FIXME #2252: bounds checking float literals is defered until trans
545+
let s = s.chars().filter(|&c| c != '_').collect::<String>();
546+
let data = token::intern_and_get_ident(&*s);
547+
filtered_float_lit(data, suffix, sd, sp)
548+
}
536549

537550
/// Parse a string representing a byte literal into its final form. Similar to `char_lit`
538551
pub fn byte_lit(lit: &str) -> (u8, uint) {
@@ -626,24 +639,19 @@ pub fn binary_lit(lit: &str) -> Rc<Vec<u8>> {
626639
Rc::new(res)
627640
}
628641

629-
pub fn integer_lit(s: &str, sd: &SpanHandler, sp: Span) -> ast::Lit_ {
642+
pub fn integer_lit(s: &str, suffix: Option<&str>, sd: &SpanHandler, sp: Span) -> ast::Lit_ {
630643
// s can only be ascii, byte indexing is fine
631644

632645
let s2 = s.chars().filter(|&c| c != '_').collect::<String>();
633646
let mut s = s2.as_slice();
634647

635-
debug!("parse_integer_lit: {}", s);
636-
637-
if s.len() == 1 {
638-
let n = (s.char_at(0)).to_digit(10).unwrap();
639-
return ast::LitInt(n as u64, ast::UnsuffixedIntLit(ast::Sign::new(n)));
640-
}
648+
debug!("integer_lit: {}, {}", s, suffix);
641649

642650
let mut base = 10;
643651
let orig = s;
644652
let mut ty = ast::UnsuffixedIntLit(ast::Plus);
645653

646-
if s.char_at(0) == '0' {
654+
if s.char_at(0) == '0' && s.len() > 1 {
647655
match s.char_at(1) {
648656
'x' => base = 16,
649657
'o' => base = 8,
@@ -652,57 +660,56 @@ pub fn integer_lit(s: &str, sd: &SpanHandler, sp: Span) -> ast::Lit_ {
652660
}
653661
}
654662

663+
// 1f64 and 2f32 etc. are valid float literals.
664+
match suffix {
665+
Some(suf) if looks_like_width_suffix(&['f'], suf) => {
666+
match base {
667+
16u => sd.span_err(sp, "hexadecimal float literal is not supported"),
668+
8u => sd.span_err(sp, "octal float literal is not supported"),
669+
2u => sd.span_err(sp, "binary float literal is not supported"),
670+
_ => ()
671+
}
672+
let ident = token::intern_and_get_ident(&*s);
673+
return filtered_float_lit(ident, suffix, sd, sp)
674+
}
675+
_ => {}
676+
}
677+
655678
if base != 10 {
656679
s = s.slice_from(2);
657680
}
658681

659-
let last = s.len() - 1;
660-
match s.char_at(last) {
661-
'i' => ty = ast::SignedIntLit(ast::TyI, ast::Plus),
662-
'u' => ty = ast::UnsignedIntLit(ast::TyU),
663-
'8' => {
664-
if s.len() > 2 {
665-
match s.char_at(last - 1) {
666-
'i' => ty = ast::SignedIntLit(ast::TyI8, ast::Plus),
667-
'u' => ty = ast::UnsignedIntLit(ast::TyU8),
668-
_ => { }
669-
}
670-
}
671-
},
672-
'6' => {
673-
if s.len() > 3 && s.char_at(last - 1) == '1' {
674-
match s.char_at(last - 2) {
675-
'i' => ty = ast::SignedIntLit(ast::TyI16, ast::Plus),
676-
'u' => ty = ast::UnsignedIntLit(ast::TyU16),
677-
_ => { }
678-
}
679-
}
680-
},
681-
'2' => {
682-
if s.len() > 3 && s.char_at(last - 1) == '3' {
683-
match s.char_at(last - 2) {
684-
'i' => ty = ast::SignedIntLit(ast::TyI32, ast::Plus),
685-
'u' => ty = ast::UnsignedIntLit(ast::TyU32),
686-
_ => { }
687-
}
688-
}
689-
},
690-
'4' => {
691-
if s.len() > 3 && s.char_at(last - 1) == '6' {
692-
match s.char_at(last - 2) {
693-
'i' => ty = ast::SignedIntLit(ast::TyI64, ast::Plus),
694-
'u' => ty = ast::UnsignedIntLit(ast::TyU64),
695-
_ => { }
682+
if let Some(suf) = suffix {
683+
if suf.is_empty() { sd.span_bug(sp, "found empty literal suffix in Some")}
684+
ty = match suf {
685+
"i" => ast::SignedIntLit(ast::TyI, ast::Plus),
686+
"i8" => ast::SignedIntLit(ast::TyI8, ast::Plus),
687+
"i16" => ast::SignedIntLit(ast::TyI16, ast::Plus),
688+
"i32" => ast::SignedIntLit(ast::TyI32, ast::Plus),
689+
"i64" => ast::SignedIntLit(ast::TyI64, ast::Plus),
690+
"u" => ast::UnsignedIntLit(ast::TyU),
691+
"u8" => ast::UnsignedIntLit(ast::TyU8),
692+
"u16" => ast::UnsignedIntLit(ast::TyU16),
693+
"u32" => ast::UnsignedIntLit(ast::TyU32),
694+
"u64" => ast::UnsignedIntLit(ast::TyU64),
695+
_ => {
696+
// i<digits> and u<digits> look like widths, so lets
697+
// give an error message along those lines
698+
if looks_like_width_suffix(&['i', 'u'], suf) {
699+
sd.span_err(sp, &*format!("illegal width `{}` for integer literal; \
700+
valid widths are 8, 16, 32 and 64",
701+
suf.slice_from(1)));
702+
} else {
703+
sd.span_err(sp, &*format!("illegal suffix `{}` for numeric literal", suf));
696704
}
705+
706+
ty
697707
}
698-
},
699-
_ => { }
708+
}
700709
}
701710

702-
debug!("The suffix is {}, base {}, the new string is {}, the original \
703-
string was {}", ty, base, s, orig);
704-
705-
s = s.slice_to(s.len() - ty.suffix_len());
711+
debug!("integer_lit: the type is {}, base {}, the new string is {}, the original \
712+
string was {}, the original suffix was {}", ty, base, s, orig, suffix);
706713

707714
let res: u64 = match ::std::num::from_str_radix(s, base) {
708715
Some(r) => r,

src/libsyntax/parse/parser.rs

+19-6
Original file line numberDiff line numberDiff line change
@@ -652,9 +652,9 @@ impl<'a> Parser<'a> {
652652
Some(suf) => {
653653
let text = suf.as_str();
654654
if text.is_empty() {
655-
self.span_bug(sp, "found empty non-None literal suffix")
655+
self.span_bug(sp, "found empty literal suffix in Some")
656656
}
657-
self.span_err(sp, &*format!("a {} with a suffix is illegal", kind));
657+
self.span_err(sp, &*format!("{} with a suffix is illegal", kind));
658658
}
659659
}
660660
}
@@ -1661,10 +1661,23 @@ impl<'a> Parser<'a> {
16611661
let (suffix_illegal, out) = match lit {
16621662
token::Byte(i) => (true, LitByte(parse::byte_lit(i.as_str()).val0())),
16631663
token::Char(i) => (true, LitChar(parse::char_lit(i.as_str()).val0())),
1664-
token::Integer(s) => (false, parse::integer_lit(s.as_str(),
1665-
&self.sess.span_diagnostic,
1666-
self.last_span)),
1667-
token::Float(s) => (false, parse::float_lit(s.as_str())),
1664+
1665+
// there are some valid suffixes for integer and
1666+
// float literals, so all the handling is done
1667+
// internally.
1668+
token::Integer(s) => {
1669+
(false, parse::integer_lit(s.as_str(),
1670+
suf.as_ref().map(|s| s.as_str()),
1671+
&self.sess.span_diagnostic,
1672+
self.last_span))
1673+
}
1674+
token::Float(s) => {
1675+
(false, parse::float_lit(s.as_str(),
1676+
suf.as_ref().map(|s| s.as_str()),
1677+
&self.sess.span_diagnostic,
1678+
self.last_span))
1679+
}
1680+
16681681
token::Str_(s) => {
16691682
(true,
16701683
LitStr(token::intern_and_get_ident(parse::str_lit(s.as_str()).as_slice()),

src/test/compile-fail/bad-lit-suffixes.rs

+9-4
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,13 @@ fn main() {
2929
'a'suffix; //~ ERROR char literal with a suffix is illegal
3030
b'a'suffix; //~ ERROR byte literal with a suffix is illegal
3131

32-
1234suffix;
33-
0b101suffix;
34-
1.0suffix;
35-
1.0e10suffix;
32+
1234u1024; //~ ERROR illegal width `1024` for integer literal
33+
1234i1024; //~ ERROR illegal width `1024` for integer literal
34+
1234f1024; //~ ERROR illegal width `1024` for float literal
35+
1234.5f1024; //~ ERROR illegal width `1024` for float literal
36+
37+
1234suffix; //~ ERROR illegal suffix `suffix` for numeric literal
38+
0b101suffix; //~ ERROR illegal suffix `suffix` for numeric literal
39+
1.0suffix; //~ ERROR illegal suffix `suffix` for numeric literal
40+
1.0e10suffix; //~ ERROR illegal suffix `suffix` for numeric literal
3641
}

0 commit comments

Comments
 (0)