Skip to content

Commit 6679595

Browse files
committed
Parse and store suffixes on literals.
This adds an optional suffix at the end of a literal token: `"foo"bar`. An actual use of a suffix in a expression (or other literal that the compiler reads) is rejected in the parser. This doesn't switch the handling of numbers to this system, and doesn't outlaw illegal suffixes for them yet.
1 parent ff0278b commit 6679595

File tree

9 files changed

+268
-97
lines changed

9 files changed

+268
-97
lines changed

src/librustdoc/html/highlight.rs

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -128,13 +128,17 @@ fn doit(sess: &parse::ParseSess, mut lexer: lexer::StringReader,
128128
}
129129
}
130130

131-
// text literals
132-
token::Literal(token::Byte(..)) | token::Literal(token::Char(..)) |
133-
token::Literal(token::Binary(..)) | token::Literal(token::BinaryRaw(..)) |
134-
token::Literal(token::Str_(..)) | token::Literal(token::StrRaw(..)) => "string",
135-
136-
// number literals
137-
token::Literal(token::Integer(..)) | token::Literal(token::Float(..)) => "number",
131+
token::Literal(lit, _suf) => {
132+
match lit {
133+
// text literals
134+
token::Byte(..) | token::Char(..) |
135+
token::Binary(..) | token::BinaryRaw(..) |
136+
token::Str_(..) | token::StrRaw(..) => "string",
137+
138+
// number literals
139+
token::Integer(..) | token::Float(..) => "number",
140+
}
141+
}
138142

139143
// keywords are also included in the identifier set
140144
token::Ident(ident, _is_mod_sep) => {

src/libsyntax/ast.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -838,7 +838,7 @@ impl TokenTree {
838838
tts: vec![TtToken(sp, token::Ident(token::str_to_ident("doc"),
839839
token::Plain)),
840840
TtToken(sp, token::Eq),
841-
TtToken(sp, token::Literal(token::Str_(name)))],
841+
TtToken(sp, token::Literal(token::Str_(name), None))],
842842
close_span: sp,
843843
}))
844844
}

src/libsyntax/diagnostics/plugin.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ pub fn expand_register_diagnostic<'cx>(ecx: &'cx mut ExtCtxt,
8787
},
8888
[ast::TtToken(_, token::Ident(ref code, _)),
8989
ast::TtToken(_, token::Comma),
90-
ast::TtToken(_, token::Literal(token::StrRaw(description, _)))] => {
90+
ast::TtToken(_, token::Literal(token::StrRaw(description, _), None))] => {
9191
(code, Some(description))
9292
}
9393
_ => unreachable!()

src/libsyntax/ext/quote.rs

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -543,10 +543,13 @@ fn mk_delim(cx: &ExtCtxt, sp: Span, delim: token::DelimToken) -> P<ast::Expr> {
543543
#[allow(non_upper_case_globals)]
544544
fn mk_token(cx: &ExtCtxt, sp: Span, tok: &token::Token) -> P<ast::Expr> {
545545
macro_rules! mk_lit {
546-
($name: expr, $($args: expr),*) => {{
546+
($name: expr, $suffix: expr, $($args: expr),*) => {{
547547
let inner = cx.expr_call(sp, mk_token_path(cx, sp, $name), vec![$($args),*]);
548-
549-
cx.expr_call(sp, mk_token_path(cx, sp, "Literal"), vec![inner])
548+
let suffix = match $suffix {
549+
Some(name) => cx.expr_some(sp, mk_name(cx, sp, ast::Ident::new(name))),
550+
None => cx.expr_none(sp)
551+
};
552+
cx.expr_call(sp, mk_token_path(cx, sp, "Literal"), vec![inner, suffix])
550553
}}
551554
}
552555
match *tok {
@@ -567,32 +570,32 @@ fn mk_token(cx: &ExtCtxt, sp: Span, tok: &token::Token) -> P<ast::Expr> {
567570
vec![mk_delim(cx, sp, delim)]);
568571
}
569572

570-
token::Literal(token::Byte(i)) => {
573+
token::Literal(token::Byte(i), suf) => {
571574
let e_byte = mk_name(cx, sp, i.ident());
572-
return mk_lit!("Byte", e_byte);
575+
return mk_lit!("Byte", suf, e_byte);
573576
}
574577

575-
token::Literal(token::Char(i)) => {
578+
token::Literal(token::Char(i), suf) => {
576579
let e_char = mk_name(cx, sp, i.ident());
577-
return mk_lit!("Char", e_char);
580+
return mk_lit!("Char", suf, e_char);
578581
}
579582

580-
token::Literal(token::Integer(i)) => {
583+
token::Literal(token::Integer(i), suf) => {
581584
let e_int = mk_name(cx, sp, i.ident());
582-
return mk_lit!("Integer", e_int);
585+
return mk_lit!("Integer", suf, e_int);
583586
}
584587

585-
token::Literal(token::Float(fident)) => {
588+
token::Literal(token::Float(fident), suf) => {
586589
let e_fident = mk_name(cx, sp, fident.ident());
587-
return mk_lit!("Float", e_fident);
590+
return mk_lit!("Float", suf, e_fident);
588591
}
589592

590-
token::Literal(token::Str_(ident)) => {
591-
return mk_lit!("Str_", mk_name(cx, sp, ident.ident()))
593+
token::Literal(token::Str_(ident), suf) => {
594+
return mk_lit!("Str_", suf, mk_name(cx, sp, ident.ident()))
592595
}
593596

594-
token::Literal(token::StrRaw(ident, n)) => {
595-
return mk_lit!("StrRaw", mk_name(cx, sp, ident.ident()), cx.expr_uint(sp, n))
597+
token::Literal(token::StrRaw(ident, n), suf) => {
598+
return mk_lit!("StrRaw", suf, mk_name(cx, sp, ident.ident()), cx.expr_uint(sp, n))
596599
}
597600

598601
token::Ident(ident, style) => {

src/libsyntax/parse/lexer/mod.rs

Lines changed: 84 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,25 @@ impl<'a> StringReader<'a> {
369369
self.nextnextch() == Some(c)
370370
}
371371

372+
/// Eats <XID_start><XID_continue>*, if possible.
373+
fn scan_optional_raw_name(&mut self) -> Option<ast::Name> {
374+
if !ident_start(self.curr) {
375+
return None
376+
}
377+
let start = self.last_pos;
378+
while ident_continue(self.curr) {
379+
self.bump();
380+
}
381+
382+
self.with_str_from(start, |string| {
383+
if string == "_" {
384+
None
385+
} else {
386+
Some(token::intern(string))
387+
}
388+
})
389+
}
390+
372391
/// PRECONDITION: self.curr is not whitespace
373392
/// Eats any kind of comment.
374393
fn scan_comment(&mut self) -> Option<TokenAndSpan> {
@@ -638,7 +657,7 @@ impl<'a> StringReader<'a> {
638657
}
639658

640659
/// Lex a LIT_INTEGER or a LIT_FLOAT
641-
fn scan_number(&mut self, c: char) -> token::Token {
660+
fn scan_number(&mut self, c: char) -> token::Lit {
642661
let mut num_digits;
643662
let mut base = 10;
644663
let start_bpos = self.last_pos;
@@ -655,17 +674,17 @@ impl<'a> StringReader<'a> {
655674
}
656675
'u' | 'i' => {
657676
self.scan_int_suffix();
658-
return token::Literal(token::Integer(self.name_from(start_bpos)));
677+
return token::Integer(self.name_from(start_bpos));
659678
},
660679
'f' => {
661680
let last_pos = self.last_pos;
662681
self.scan_float_suffix();
663682
self.check_float_base(start_bpos, last_pos, base);
664-
return token::Literal(token::Float(self.name_from(start_bpos)));
683+
return token::Float(self.name_from(start_bpos));
665684
}
666685
_ => {
667686
// just a 0
668-
return token::Literal(token::Integer(self.name_from(start_bpos)));
687+
return token::Integer(self.name_from(start_bpos));
669688
}
670689
}
671690
} else if c.is_digit_radix(10) {
@@ -678,7 +697,7 @@ impl<'a> StringReader<'a> {
678697
self.err_span_(start_bpos, self.last_pos, "no valid digits found for number");
679698
// eat any suffix
680699
self.scan_int_suffix();
681-
return token::Literal(token::Integer(token::intern("0")));
700+
return token::Integer(token::intern("0"));
682701
}
683702

684703
// might be a float, but don't be greedy if this is actually an
@@ -696,25 +715,25 @@ impl<'a> StringReader<'a> {
696715
}
697716
let last_pos = self.last_pos;
698717
self.check_float_base(start_bpos, last_pos, base);
699-
return token::Literal(token::Float(self.name_from(start_bpos)));
718+
return token::Float(self.name_from(start_bpos));
700719
} else if self.curr_is('f') {
701720
// or it might be an integer literal suffixed as a float
702721
self.scan_float_suffix();
703722
let last_pos = self.last_pos;
704723
self.check_float_base(start_bpos, last_pos, base);
705-
return token::Literal(token::Float(self.name_from(start_bpos)));
724+
return token::Float(self.name_from(start_bpos));
706725
} else {
707726
// it might be a float if it has an exponent
708727
if self.curr_is('e') || self.curr_is('E') {
709728
self.scan_float_exponent();
710729
self.scan_float_suffix();
711730
let last_pos = self.last_pos;
712731
self.check_float_base(start_bpos, last_pos, base);
713-
return token::Literal(token::Float(self.name_from(start_bpos)));
732+
return token::Float(self.name_from(start_bpos));
714733
}
715734
// but we certainly have an integer!
716735
self.scan_int_suffix();
717-
return token::Literal(token::Integer(self.name_from(start_bpos)));
736+
return token::Integer(self.name_from(start_bpos));
718737
}
719738
}
720739

@@ -967,7 +986,9 @@ impl<'a> StringReader<'a> {
967986
}
968987

969988
if is_dec_digit(c) {
970-
return self.scan_number(c.unwrap());
989+
let num = self.scan_number(c.unwrap());
990+
let suffix = self.scan_optional_raw_name();
991+
return token::Literal(num, suffix)
971992
}
972993

973994
if self.read_embedded_ident {
@@ -1126,17 +1147,19 @@ impl<'a> StringReader<'a> {
11261147
}
11271148
let id = if valid { self.name_from(start) } else { token::intern("0") };
11281149
self.bump(); // advance curr past token
1129-
return token::Literal(token::Char(id));
1150+
let suffix = self.scan_optional_raw_name();
1151+
return token::Literal(token::Char(id), suffix);
11301152
}
11311153
'b' => {
11321154
self.bump();
1133-
return match self.curr {
1155+
let lit = match self.curr {
11341156
Some('\'') => self.scan_byte(),
11351157
Some('"') => self.scan_byte_string(),
11361158
Some('r') => self.scan_raw_byte_string(),
11371159
_ => unreachable!() // Should have been a token::Ident above.
11381160
};
1139-
1161+
let suffix = self.scan_optional_raw_name();
1162+
return token::Literal(lit, suffix);
11401163
}
11411164
'"' => {
11421165
let start_bpos = self.last_pos;
@@ -1157,7 +1180,8 @@ impl<'a> StringReader<'a> {
11571180
let id = if valid { self.name_from(start_bpos + BytePos(1)) }
11581181
else { token::intern("??") };
11591182
self.bump();
1160-
return token::Literal(token::Str_(id));
1183+
let suffix = self.scan_optional_raw_name();
1184+
return token::Literal(token::Str_(id), suffix);
11611185
}
11621186
'r' => {
11631187
let start_bpos = self.last_pos;
@@ -1224,7 +1248,8 @@ impl<'a> StringReader<'a> {
12241248
} else {
12251249
token::intern("??")
12261250
};
1227-
return token::Literal(token::StrRaw(id, hash_count));
1251+
let suffix = self.scan_optional_raw_name();
1252+
return token::Literal(token::StrRaw(id, hash_count), suffix);
12281253
}
12291254
'-' => {
12301255
if self.nextch_is('>') {
@@ -1293,7 +1318,7 @@ impl<'a> StringReader<'a> {
12931318
|| (self.curr_is('#') && self.nextch_is('!') && !self.nextnextch_is('['))
12941319
}
12951320

1296-
fn scan_byte(&mut self) -> token::Token {
1321+
fn scan_byte(&mut self) -> token::Lit {
12971322
self.bump();
12981323
let start = self.last_pos;
12991324

@@ -1314,10 +1339,10 @@ impl<'a> StringReader<'a> {
13141339

13151340
let id = if valid { self.name_from(start) } else { token::intern("??") };
13161341
self.bump(); // advance curr past token
1317-
return token::Literal(token::Byte(id));
1342+
return token::Byte(id);
13181343
}
13191344

1320-
fn scan_byte_string(&mut self) -> token::Token {
1345+
fn scan_byte_string(&mut self) -> token::Lit {
13211346
self.bump();
13221347
let start = self.last_pos;
13231348
let mut valid = true;
@@ -1336,10 +1361,10 @@ impl<'a> StringReader<'a> {
13361361
}
13371362
let id = if valid { self.name_from(start) } else { token::intern("??") };
13381363
self.bump();
1339-
return token::Literal(token::Binary(id));
1364+
return token::Binary(id);
13401365
}
13411366

1342-
fn scan_raw_byte_string(&mut self) -> token::Token {
1367+
fn scan_raw_byte_string(&mut self) -> token::Lit {
13431368
let start_bpos = self.last_pos;
13441369
self.bump();
13451370
let mut hash_count = 0u;
@@ -1387,9 +1412,9 @@ impl<'a> StringReader<'a> {
13871412
self.bump();
13881413
}
13891414
self.bump();
1390-
return token::Literal(token::BinaryRaw(self.name_from_to(content_start_bpos,
1391-
content_end_bpos),
1392-
hash_count));
1415+
return token::BinaryRaw(self.name_from_to(content_start_bpos,
1416+
content_end_bpos),
1417+
hash_count);
13931418
}
13941419
}
13951420

@@ -1536,17 +1561,17 @@ mod test {
15361561

15371562
#[test] fn character_a() {
15381563
assert_eq!(setup(&mk_sh(), "'a'".to_string()).next_token().tok,
1539-
token::Literal(token::Char(token::intern("a"))));
1564+
token::Literal(token::Char(token::intern("a")), None));
15401565
}
15411566

15421567
#[test] fn character_space() {
15431568
assert_eq!(setup(&mk_sh(), "' '".to_string()).next_token().tok,
1544-
token::Literal(token::Char(token::intern(" "))));
1569+
token::Literal(token::Char(token::intern(" ")), None));
15451570
}
15461571

15471572
#[test] fn character_escaped() {
15481573
assert_eq!(setup(&mk_sh(), "'\\n'".to_string()).next_token().tok,
1549-
token::Literal(token::Char(token::intern("\\n"))));
1574+
token::Literal(token::Char(token::intern("\\n")), None));
15501575
}
15511576

15521577
#[test] fn lifetime_name() {
@@ -1558,7 +1583,38 @@ mod test {
15581583
assert_eq!(setup(&mk_sh(),
15591584
"r###\"\"#a\\b\x00c\"\"###".to_string()).next_token()
15601585
.tok,
1561-
token::Literal(token::StrRaw(token::intern("\"#a\\b\x00c\""), 3)));
1586+
token::Literal(token::StrRaw(token::intern("\"#a\\b\x00c\""), 3), None));
1587+
}
1588+
1589+
#[test] fn literal_suffixes() {
1590+
macro_rules! test {
1591+
($input: expr, $tok_type: ident, $tok_contents: expr) => {{
1592+
assert_eq!(setup(&mk_sh(), format!("{}suffix", $input)).next_token().tok,
1593+
token::Literal(token::$tok_type(token::intern($tok_contents)),
1594+
Some(token::intern("suffix"))));
1595+
// with a whitespace separator:
1596+
assert_eq!(setup(&mk_sh(), format!("{} suffix", $input)).next_token().tok,
1597+
token::Literal(token::$tok_type(token::intern($tok_contents)),
1598+
None));
1599+
}}
1600+
}
1601+
1602+
test!("'a'", Char, "a");
1603+
test!("b'a'", Byte, "a");
1604+
test!("\"a\"", Str_, "a");
1605+
test!("b\"a\"", Binary, "a");
1606+
test!("1234", Integer, "1234");
1607+
test!("0b101", Integer, "0b101");
1608+
test!("0xABC", Integer, "0xABC");
1609+
test!("1.0", Float, "1.0");
1610+
test!("1.0e10", Float, "1.0e10");
1611+
1612+
assert_eq!(setup(&mk_sh(), "r###\"raw\"###suffix".to_string()).next_token().tok,
1613+
token::Literal(token::StrRaw(token::intern("raw"), 3),
1614+
Some(token::intern("suffix"))));
1615+
assert_eq!(setup(&mk_sh(), "br###\"raw\"###suffix".to_string()).next_token().tok,
1616+
token::Literal(token::BinaryRaw(token::intern("raw"), 3),
1617+
Some(token::intern("suffix"))));
15621618
}
15631619

15641620
#[test] fn line_doc_comments() {
@@ -1574,7 +1630,7 @@ mod test {
15741630
token::Comment => { },
15751631
_ => panic!("expected a comment!")
15761632
}
1577-
assert_eq!(lexer.next_token().tok, token::Literal(token::Char(token::intern("a"))));
1633+
assert_eq!(lexer.next_token().tok, token::Literal(token::Char(token::intern("a")), None));
15781634
}
15791635

15801636
}

0 commit comments

Comments
 (0)