Skip to content

Fix the bug of next_point in source_map #103185

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 20, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions compiler/rustc_expand/src/expand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -937,13 +937,12 @@ pub fn ensure_complete_parse<'a>(
kind_name,
);
err.note(&msg);
let semi_span = this.sess.source_map().next_point(span);

let semi_full_span = semi_span.to(this.sess.source_map().next_point(semi_span));
match this.sess.source_map().span_to_snippet(semi_full_span) {
let semi_span = this.sess.source_map().next_point(span);
match this.sess.source_map().span_to_snippet(semi_span) {
Ok(ref snippet) if &snippet[..] != ";" && kind_name == "expression" => {
err.span_suggestion(
semi_span,
span.shrink_to_hi(),
"you might be missing a semicolon here",
";",
Applicability::MaybeIncorrect,
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_expand/src/mbe/macro_rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ fn emit_frag_parse_err(
);
if !e.span.is_dummy() {
// early end of macro arm (#52866)
e.replace_span_with(parser.sess.source_map().next_point(parser.token.span));
e.replace_span_with(parser.token.span.shrink_to_hi());
}
}
if e.span.is_dummy() {
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_parse/src/parser/diagnostics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1461,7 +1461,7 @@ impl<'a> Parser<'a> {
let (prev_sp, sp) = match (&self.token.kind, self.subparser_name) {
// Point at the end of the macro call when reaching end of macro arguments.
(token::Eof, Some(_)) => {
let sp = self.sess.source_map().next_point(self.prev_token.span);
let sp = self.prev_token.span.shrink_to_hi();
(sp, sp)
}
// We don't want to point at the following span after DUMMY_SP.
Expand Down Expand Up @@ -2039,7 +2039,7 @@ impl<'a> Parser<'a> {
pub(super) fn expected_expression_found(&self) -> DiagnosticBuilder<'a, ErrorGuaranteed> {
let (span, msg) = match (&self.token.kind, self.subparser_name) {
(&token::Eof, Some(origin)) => {
let sp = self.sess.source_map().next_point(self.prev_token.span);
let sp = self.prev_token.span.shrink_to_hi();
(sp, format!("expected expression, found end of {origin}"))
}
_ => (
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_parse/src/parser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2172,7 +2172,7 @@ impl<'a> Parser<'a> {
},
ExprKind::Block(_, None) => {
self.sess.emit_err(IfExpressionMissingCondition {
if_span: self.sess.source_map().next_point(lo),
if_span: lo.shrink_to_hi(),
block_span: self.sess.source_map().start_point(cond_span),
});
std::mem::replace(&mut cond, this.mk_expr_err(cond_span.shrink_to_hi()))
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_parse/src/parser/item.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1601,7 +1601,7 @@ impl<'a> Parser<'a> {
self.sess.emit_err(err);
} else {
if !seen_comma {
let sp = self.sess.source_map().next_point(previous_span);
let sp = previous_span.shrink_to_hi();
err.missing_comma = Some(sp);
}
return Err(err.into_diagnostic(&self.sess.span_diagnostic));
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_resolve/src/late/diagnostics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1731,7 +1731,7 @@ impl<'a: 'ast, 'ast> LateResolutionVisitor<'a, '_, 'ast> {
for _ in 0..100 {
// Try to find an assignment
sp = sm.next_point(sp);
let snippet = sm.span_to_snippet(sp.to(sm.next_point(sp)));
let snippet = sm.span_to_snippet(sp);
match snippet {
Ok(ref x) if x.as_str() == "=" => {
err.span_suggestion(
Expand Down
7 changes: 4 additions & 3 deletions compiler/rustc_span/src/source_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -859,14 +859,15 @@ impl SourceMap {
}
let start_of_next_point = sp.hi().0;

let width = self.find_width_of_character_at_span(sp.shrink_to_hi(), true);
let width = self.find_width_of_character_at_span(sp, true);
debug_assert!(width > 0);
// If the width is 1, then the next span should point to the same `lo` and `hi`. However,
// in the case of a multibyte character, where the width != 1, the next span should
// span multiple bytes to include the whole character.
let end_of_next_point =
start_of_next_point.checked_add(width - 1).unwrap_or(start_of_next_point);
start_of_next_point.checked_add(width).unwrap_or(start_of_next_point);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've always found this bit confusing, but I'm wondering if this comment is now out-of-date?

The next_point() function used to return an empty span (when the "width is 1" as in the comment) pointing just before the next character. This change seems to change it so that it is a non-empty span containing the next character.

I'm also wondering if the name of the function is also a bit misleading? The original next_point function always returned an empty span (a "point", not a region). This was changed in #47420 to have the somewhat confusing behavior where an empty span would move forward to span the next character (non-empty), and a non-empty span would return an empty span pointing just past the end. It's not clear from that PR why that behavior was chosen.

I also think this function is still somewhat buggy with respect to multi-byte characters. Below is a unittest which illustrates some oddities. Perhaps it might be good to add this unittest?

#[test]
fn next_point() {
    let sm = SourceMap::new(FilePathMapping::empty());
    sm.new_source_file(PathBuf::from("example.rs").into(), "a…b".to_string());
    let span = Span::with_root_ctxt(BytePos(0), BytePos(0));
    // This confusingly does not advance the span?
    let span = sm.next_point(span);
    assert_eq!(span.lo().0, 0);
    assert_eq!(span.hi().0, 0); // ERROR: This should probably be 1?

    let span = Span::with_root_ctxt(BytePos(0), BytePos(1));
    let span = sm.next_point(span);
    assert_eq!(span.lo().0, 1);
    assert_eq!(span.hi().0, 4);

    // This creates an invalid span, slicing in the middle of a multi-byte char?
    let span = Span::with_root_ctxt(BytePos(1), BytePos(1));
    let span = sm.next_point(span);
    assert_eq!(span.lo().0, 1);
    assert_eq!(span.hi().0, 2); // ERROR: This should probably be 4?

    let span = Span::with_root_ctxt(BytePos(1), BytePos(4));
    let span = sm.next_point(span);
    assert_eq!(span.lo().0, 4);
    assert_eq!(span.hi().0, 5);
    // This creates an invalid span, pointing past the end of the file?
    let span = sm.next_point(span);
    assert_eq!(span.lo().0, 5);
    assert_eq!(span.hi().0, 6); // ERROR: This should probably be 5?

    // Empty span pointing just past the last byte.
    let span = Span::with_root_ctxt(BytePos(5), BytePos(5));
    let span = sm.next_point(span);
    assert_eq!(span.lo().0, 5);
    assert_eq!(span.hi().0, 6); // ERROR: This should probably be 5?
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This unit test implied another issue:

// This creates an invalid span, slicing in the middle of a multi-byte char?
    let span = Span::with_root_ctxt(BytePos(1), BytePos(1));
    let span = sm.next_point(span);
    assert_eq!(span.lo().0, 1);
    assert_eq!(span.hi().0, 2);

When lo and hi are same, we just reutrn the width with 1:

if sp.lo == sp.hi {

So the result is span.hi().0 will be 2.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @ehuss,

I fixed all other issues in the test cases, except for this one:

 let span = sm.next_point(span);
    assert_eq!(span.lo().0, 0);
    assert_eq!(span.hi().0, 0); // ERROR: This should probably be 1?

It return the same span because it's a dummy span, something special for macros..


let end_of_next_point = BytePos(cmp::max(sp.lo().0 + 1, end_of_next_point));
let end_of_next_point = BytePos(cmp::max(start_of_next_point + 1, end_of_next_point));
Span::new(BytePos(start_of_next_point), end_of_next_point, sp.ctxt(), None)
}

Expand Down