Skip to content

Commit 7f826cb

Browse files
committed
auto merge of #9308 : ben0x539/rust/lexer-error-spans, r=alexcrichton
Previously, the lexer calling `rdr.fatal(...)` would report the span of the last complete token, instead of a span within the erroneous token (besides one span fixed in 1ac90bb). This branch adds wrappers around `rdr.fatal(...)` that sets the span explicilty, so that all fatal errors in `libsyntax/parse/lexer.rs` now report the offending code more precisely. A number of tests try to verify that, though the `compile-fail` testing setup can only check that the spans are on the right lines, and the "unterminated string/block comment" errors can't have the line marked at all, so that's incomplete. This closes #9149. Also, the lexer errors now report the offending code in the error message, not just via the span, just like other errors do.
2 parents 407d179 + 567c567 commit 7f826cb

13 files changed

+247
-26
lines changed

src/libsyntax/parse/lexer.rs

Lines changed: 91 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,46 @@ impl reader for TtReader {
149149
fn dup(@mut self) -> @mut reader { dup_tt_reader(self) as @mut reader }
150150
}
151151

152+
// report a lexical error spanning [`from_pos`, `to_pos`)
153+
fn fatal_span(rdr: @mut StringReader,
154+
from_pos: BytePos,
155+
to_pos: BytePos,
156+
m: ~str)
157+
-> ! {
158+
rdr.peek_span = codemap::mk_sp(from_pos, to_pos);
159+
rdr.fatal(m);
160+
}
161+
162+
// report a lexical error spanning [`from_pos`, `to_pos`), appending an
163+
// escaped character to the error message
164+
fn fatal_span_char(rdr: @mut StringReader,
165+
from_pos: BytePos,
166+
to_pos: BytePos,
167+
m: ~str,
168+
c: char)
169+
-> ! {
170+
let mut m = m;
171+
m.push_str(": ");
172+
char::escape_default(c, |c| m.push_char(c));
173+
fatal_span(rdr, from_pos, to_pos, m);
174+
}
175+
176+
// report a lexical error spanning [`from_pos`, `to_pos`), appending the
177+
// offending string to the error message
178+
fn fatal_span_verbose(rdr: @mut StringReader,
179+
from_pos: BytePos,
180+
to_pos: BytePos,
181+
m: ~str)
182+
-> ! {
183+
let mut m = m;
184+
m.push_str(": ");
185+
let s = rdr.src.slice(
186+
byte_offset(rdr, from_pos).to_uint(),
187+
byte_offset(rdr, to_pos).to_uint());
188+
m.push_str(s);
189+
fatal_span(rdr, from_pos, to_pos, m);
190+
}
191+
152192
// EFFECT: advance peek_tok and peek_span to refer to the next token.
153193
// EFFECT: update the interner, maybe.
154194
fn string_advance_token(r: @mut StringReader) {
@@ -327,7 +367,8 @@ fn consume_block_comment(rdr: @mut StringReader)
327367
bump(rdr);
328368
}
329369
if is_eof(rdr) {
330-
rdr.fatal(~"unterminated block doc-comment");
370+
fatal_span(rdr, start_bpos, rdr.last_pos,
371+
~"unterminated block doc-comment");
331372
} else {
332373
bump(rdr);
333374
bump(rdr);
@@ -344,8 +385,12 @@ fn consume_block_comment(rdr: @mut StringReader)
344385
}
345386
}
346387
} else {
388+
let start_bpos = rdr.last_pos - BytePos(2u);
347389
loop {
348-
if is_eof(rdr) { rdr.fatal(~"unterminated block comment"); }
390+
if is_eof(rdr) {
391+
fatal_span(rdr, start_bpos, rdr.last_pos,
392+
~"unterminated block comment");
393+
}
349394
if rdr.curr == '*' && nextch(rdr) == '/' {
350395
bump(rdr);
351396
bump(rdr);
@@ -361,7 +406,7 @@ fn consume_block_comment(rdr: @mut StringReader)
361406
if res.is_some() { res } else { consume_whitespace_and_comments(rdr) }
362407
}
363408

364-
fn scan_exponent(rdr: @mut StringReader) -> Option<~str> {
409+
fn scan_exponent(rdr: @mut StringReader, start_bpos: BytePos) -> Option<~str> {
365410
let mut c = rdr.curr;
366411
let mut rslt = ~"";
367412
if c == 'e' || c == 'E' {
@@ -375,7 +420,10 @@ fn scan_exponent(rdr: @mut StringReader) -> Option<~str> {
375420
let exponent = scan_digits(rdr, 10u);
376421
if exponent.len() > 0u {
377422
return Some(rslt + exponent);
378-
} else { rdr.fatal(~"scan_exponent: bad fp literal"); }
423+
} else {
424+
fatal_span(rdr, start_bpos, rdr.last_pos,
425+
~"scan_exponent: bad fp literal");
426+
}
379427
} else { return None::<~str>; }
380428
}
381429

@@ -399,6 +447,7 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
399447
let mut base = 10u;
400448
let mut c = c;
401449
let mut n = nextch(rdr);
450+
let start_bpos = rdr.last_pos;
402451
if c == '0' && n == 'x' {
403452
bump(rdr);
404453
bump(rdr);
@@ -442,11 +491,13 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
442491
else { either::Right(ast::ty_u64) };
443492
}
444493
if num_str.len() == 0u {
445-
rdr.fatal(~"no valid digits found for number");
494+
fatal_span(rdr, start_bpos, rdr.last_pos,
495+
~"no valid digits found for number");
446496
}
447497
let parsed = match from_str_radix::<u64>(num_str, base as uint) {
448498
Some(p) => p,
449-
None => rdr.fatal(~"int literal is too large")
499+
None => fatal_span(rdr, start_bpos, rdr.last_pos,
500+
~"int literal is too large")
450501
};
451502

452503
match tp {
@@ -464,12 +515,14 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
464515
}
465516
if is_float {
466517
match base {
467-
16u => rdr.fatal(~"hexadecimal float literal is not supported"),
468-
2u => rdr.fatal(~"binary float literal is not supported"),
518+
16u => fatal_span(rdr, start_bpos, rdr.last_pos,
519+
~"hexadecimal float literal is not supported"),
520+
2u => fatal_span(rdr, start_bpos, rdr.last_pos,
521+
~"binary float literal is not supported"),
469522
_ => ()
470523
}
471524
}
472-
match scan_exponent(rdr) {
525+
match scan_exponent(rdr, start_bpos) {
473526
Some(ref s) => {
474527
is_float = true;
475528
num_str.push_str(*s);
@@ -507,11 +560,13 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
507560
return token::LIT_FLOAT_UNSUFFIXED(str_to_ident(num_str));
508561
} else {
509562
if num_str.len() == 0u {
510-
rdr.fatal(~"no valid digits found for number");
563+
fatal_span(rdr, start_bpos, rdr.last_pos,
564+
~"no valid digits found for number");
511565
}
512566
let parsed = match from_str_radix::<u64>(num_str, base as uint) {
513567
Some(p) => p,
514-
None => rdr.fatal(~"int literal is too large")
568+
None => fatal_span(rdr, start_bpos, rdr.last_pos,
569+
~"int literal is too large")
515570
};
516571

517572
debug!("lexing %s as an unsuffixed integer literal",
@@ -523,19 +578,23 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
523578
fn scan_numeric_escape(rdr: @mut StringReader, n_hex_digits: uint) -> char {
524579
let mut accum_int = 0;
525580
let mut i = n_hex_digits;
581+
let start_bpos = rdr.last_pos;
526582
while i != 0u {
527583
let n = rdr.curr;
528-
bump(rdr);
529584
if !is_hex_digit(n) {
530-
rdr.fatal(fmt!("illegal numeric character escape: %d", n as int));
585+
fatal_span_char(rdr, rdr.last_pos, rdr.pos,
586+
~"illegal character in numeric character escape",
587+
n);
531588
}
589+
bump(rdr);
532590
accum_int *= 16;
533591
accum_int += hex_digit_val(n);
534592
i -= 1u;
535593
}
536594
match char::from_u32(accum_int as u32) {
537595
Some(x) => x,
538-
None => rdr.fatal(fmt!("illegal numeric character escape"))
596+
None => fatal_span(rdr, start_bpos, rdr.last_pos,
597+
~"illegal numeric character escape")
539598
}
540599
}
541600

@@ -691,6 +750,7 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
691750
if c2 == '\\' {
692751
// '\X' for some X must be a character constant:
693752
let escaped = rdr.curr;
753+
let escaped_pos = rdr.last_pos;
694754
bump(rdr);
695755
match escaped {
696756
'n' => { c2 = '\n'; }
@@ -704,32 +764,39 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
704764
'u' => { c2 = scan_numeric_escape(rdr, 4u); }
705765
'U' => { c2 = scan_numeric_escape(rdr, 8u); }
706766
c2 => {
707-
rdr.fatal(fmt!("unknown character escape: %d", c2 as int));
767+
fatal_span_char(rdr, escaped_pos, rdr.last_pos,
768+
~"unknown character escape", c2);
708769
}
709770
}
710771
}
711772
if rdr.curr != '\'' {
712-
rdr.fatal(~"unterminated character constant");
773+
fatal_span_verbose(rdr,
774+
// Byte offsetting here is okay because the
775+
// character before position `start` is an
776+
// ascii single quote.
777+
start - BytePos(1u),
778+
rdr.last_pos,
779+
~"unterminated character constant");
713780
}
714781
bump(rdr); // advance curr past token
715782
return token::LIT_CHAR(c2 as u32);
716783
}
717784
'"' => {
718785
let mut accum_str = ~"";
719-
let n = rdr.last_pos;
786+
let start_bpos = rdr.last_pos;
720787
bump(rdr);
721788
while rdr.curr != '"' {
722789
if is_eof(rdr) {
723-
do with_str_from(rdr, n) |s| {
724-
rdr.fatal(fmt!("unterminated double quote string: %s", s));
725-
}
790+
fatal_span(rdr, start_bpos, rdr.last_pos,
791+
~"unterminated double quote string");
726792
}
727793

728794
let ch = rdr.curr;
729795
bump(rdr);
730796
match ch {
731797
'\\' => {
732798
let escaped = rdr.curr;
799+
let escaped_pos = rdr.last_pos;
733800
bump(rdr);
734801
match escaped {
735802
'n' => accum_str.push_char('\n'),
@@ -750,7 +817,8 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
750817
accum_str.push_char(scan_numeric_escape(rdr, 8u));
751818
}
752819
c2 => {
753-
rdr.fatal(fmt!("unknown string escape: %d", c2 as int));
820+
fatal_span_char(rdr, escaped_pos, rdr.last_pos,
821+
~"unknown string escape", c2);
754822
}
755823
}
756824
}
@@ -786,11 +854,8 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
786854
'^' => { return binop(rdr, token::CARET); }
787855
'%' => { return binop(rdr, token::PERCENT); }
788856
c => {
789-
// So the error span points to the unrecognized character
790-
rdr.peek_span = codemap::mk_sp(rdr.last_pos, rdr.pos);
791-
let mut cs = ~"";
792-
char::escape_default(c, |c| cs.push_char(c));
793-
rdr.fatal(fmt!("unknown start of token: %s", cs));
857+
fatal_span_char(rdr, rdr.last_pos, rdr.pos,
858+
~"unknown start of token", c);
794859
}
795860
}
796861
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
static f: float =
12+
1e+ //~ ERROR: scan_exponent: bad fp literal
13+
;
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
static f: float =
12+
0x539.0 //~ ERROR: hexadecimal float literal is not supported
13+
;
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
static c: char =
12+
'\Uffffffff' //~ ERROR: illegal numeric character escape
13+
;
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
static c: char =
12+
'\u539_' //~ ERROR: illegal character in numeric character escape
13+
;
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
static i: int =
12+
99999999999999999999999999999999u32 //~ ERROR: int literal is too large
13+
;
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
static i: int =
12+
99999999999999999999999999999999 //~ ERROR: int literal is too large
13+
;
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
static i: int =
12+
0xu32 //~ ERROR: no valid digits
13+
;
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
static i: int =
12+
0x //~ ERROR: no valid digits
13+
;
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
static c: char =
12+
'\●' //~ ERROR: unknown character escape
13+
;
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
fn main() {
12+
//~ ERROR: unknown start of token
13+
}

0 commit comments

Comments
 (0)