Skip to content

Commit 43062c4

Browse files
committed
Auto merge of rust-lang#112216 - est31:offset_of_deep_tuple, r=petrochenkov
Support float-like tuple indices in offset_of!() Supports invocations like `offset_of!((((), ()), ()), 0.0)`. This `0.0` gets tokenized as float literal, so it has to be broken up again. The code that did the breaking up was returning a finished `Expr`, while we need a `Ident`, so this PR splits up the `parse_expr_tuple_field_access_float` function into: * a function that breaks up the float literal (similar to `TokenKind::break_two_token_op`, but we do access the parser during this splitting operation, so we keep it as an inherent function on the parser) * and a function that constructs an `Expr` from it The former we can then re-use in `offset_of` parsing. The edge cases especially involving whitespaces are tricky so this adds a bunch of new tests as well. fixes rust-lang#112204
2 parents 397641f + 9fb266b commit 43062c4

File tree

4 files changed

+390
-42
lines changed

4 files changed

+390
-42
lines changed

compiler/rustc_parse/src/parser/expr.rs

+92-24
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,18 @@ impl From<P<Expr>> for LhsExpr {
9191
}
9292
}
9393

94+
#[derive(Debug)]
95+
enum DestructuredFloat {
96+
/// 1e2
97+
Single(Symbol, Span),
98+
/// 1.
99+
TrailingDot(Symbol, Span, Span),
100+
/// 1.2 | 1.2e3
101+
MiddleDot(Symbol, Span, Span, Symbol, Span),
102+
/// Invalid
103+
Error,
104+
}
105+
94106
impl<'a> Parser<'a> {
95107
/// Parses an expression.
96108
#[inline]
@@ -1013,13 +1025,8 @@ impl<'a> Parser<'a> {
10131025
// support pushing "future tokens" (would be also helpful to `break_and_eat`), or
10141026
// we should break everything including floats into more basic proc-macro style
10151027
// tokens in the lexer (probably preferable).
1016-
fn parse_expr_tuple_field_access_float(
1017-
&mut self,
1018-
lo: Span,
1019-
base: P<Expr>,
1020-
float: Symbol,
1021-
suffix: Option<Symbol>,
1022-
) -> P<Expr> {
1028+
// See also `TokenKind::break_two_token_op` which does similar splitting of `>>` into `>`.
1029+
fn break_up_float(&mut self, float: Symbol) -> DestructuredFloat {
10231030
#[derive(Debug)]
10241031
enum FloatComponent {
10251032
IdentLike(String),
@@ -1056,7 +1063,7 @@ impl<'a> Parser<'a> {
10561063
match &*components {
10571064
// 1e2
10581065
[IdentLike(i)] => {
1059-
self.parse_expr_tuple_field_access(lo, base, Symbol::intern(&i), suffix, None)
1066+
DestructuredFloat::Single(Symbol::intern(&i), span)
10601067
}
10611068
// 1.
10621069
[IdentLike(i), Punct('.')] => {
@@ -1068,11 +1075,8 @@ impl<'a> Parser<'a> {
10681075
} else {
10691076
(span, span)
10701077
};
1071-
assert!(suffix.is_none());
10721078
let symbol = Symbol::intern(&i);
1073-
self.token = Token::new(token::Ident(symbol, false), ident_span);
1074-
let next_token = (Token::new(token::Dot, dot_span), self.token_spacing);
1075-
self.parse_expr_tuple_field_access(lo, base, symbol, None, Some(next_token))
1079+
DestructuredFloat::TrailingDot(symbol, ident_span, dot_span)
10761080
}
10771081
// 1.2 | 1.2e3
10781082
[IdentLike(i1), Punct('.'), IdentLike(i2)] => {
@@ -1088,16 +1092,8 @@ impl<'a> Parser<'a> {
10881092
(span, span, span)
10891093
};
10901094
let symbol1 = Symbol::intern(&i1);
1091-
self.token = Token::new(token::Ident(symbol1, false), ident1_span);
1092-
// This needs to be `Spacing::Alone` to prevent regressions.
1093-
// See issue #76399 and PR #76285 for more details
1094-
let next_token1 = (Token::new(token::Dot, dot_span), Spacing::Alone);
1095-
let base1 =
1096-
self.parse_expr_tuple_field_access(lo, base, symbol1, None, Some(next_token1));
10971095
let symbol2 = Symbol::intern(&i2);
1098-
let next_token2 = Token::new(token::Ident(symbol2, false), ident2_span);
1099-
self.bump_with((next_token2, self.token_spacing)); // `.`
1100-
self.parse_expr_tuple_field_access(lo, base1, symbol2, suffix, None)
1096+
DestructuredFloat::MiddleDot(symbol1, ident1_span, dot_span, symbol2, ident2_span)
11011097
}
11021098
// 1e+ | 1e- (recovered)
11031099
[IdentLike(_), Punct('+' | '-')] |
@@ -1109,12 +1105,83 @@ impl<'a> Parser<'a> {
11091105
[IdentLike(_), Punct('.'), IdentLike(_), Punct('+' | '-'), IdentLike(_)] => {
11101106
// See the FIXME about `TokenCursor` above.
11111107
self.error_unexpected_after_dot();
1112-
base
1108+
DestructuredFloat::Error
11131109
}
11141110
_ => panic!("unexpected components in a float token: {:?}", components),
11151111
}
11161112
}
11171113

1114+
fn parse_expr_tuple_field_access_float(
1115+
&mut self,
1116+
lo: Span,
1117+
base: P<Expr>,
1118+
float: Symbol,
1119+
suffix: Option<Symbol>,
1120+
) -> P<Expr> {
1121+
match self.break_up_float(float) {
1122+
// 1e2
1123+
DestructuredFloat::Single(sym, _sp) => {
1124+
self.parse_expr_tuple_field_access(lo, base, sym, suffix, None)
1125+
}
1126+
// 1.
1127+
DestructuredFloat::TrailingDot(sym, ident_span, dot_span) => {
1128+
assert!(suffix.is_none());
1129+
self.token = Token::new(token::Ident(sym, false), ident_span);
1130+
let next_token = (Token::new(token::Dot, dot_span), self.token_spacing);
1131+
self.parse_expr_tuple_field_access(lo, base, sym, None, Some(next_token))
1132+
}
1133+
// 1.2 | 1.2e3
1134+
DestructuredFloat::MiddleDot(symbol1, ident1_span, dot_span, symbol2, ident2_span) => {
1135+
self.token = Token::new(token::Ident(symbol1, false), ident1_span);
1136+
// This needs to be `Spacing::Alone` to prevent regressions.
1137+
// See issue #76399 and PR #76285 for more details
1138+
let next_token1 = (Token::new(token::Dot, dot_span), Spacing::Alone);
1139+
let base1 =
1140+
self.parse_expr_tuple_field_access(lo, base, symbol1, None, Some(next_token1));
1141+
let next_token2 = Token::new(token::Ident(symbol2, false), ident2_span);
1142+
self.bump_with((next_token2, self.token_spacing)); // `.`
1143+
self.parse_expr_tuple_field_access(lo, base1, symbol2, suffix, None)
1144+
}
1145+
DestructuredFloat::Error => base,
1146+
}
1147+
}
1148+
1149+
fn parse_field_name_maybe_tuple(&mut self) -> PResult<'a, ThinVec<Ident>> {
1150+
let token::Literal(token::Lit { kind: token::Float, symbol, suffix }) = self.token.kind
1151+
else {
1152+
return Ok(thin_vec![self.parse_field_name()?]);
1153+
};
1154+
Ok(match self.break_up_float(symbol) {
1155+
// 1e2
1156+
DestructuredFloat::Single(sym, sp) => {
1157+
self.bump();
1158+
thin_vec![Ident::new(sym, sp)]
1159+
}
1160+
// 1.
1161+
DestructuredFloat::TrailingDot(sym, sym_span, dot_span) => {
1162+
assert!(suffix.is_none());
1163+
// Analogous to `Self::break_and_eat`
1164+
self.token_cursor.break_last_token = true;
1165+
// This might work, in cases like `1. 2`, and might not,
1166+
// in cases like `offset_of!(Ty, 1.)`. It depends on what comes
1167+
// after the float-like token, and therefore we have to make
1168+
// the other parts of the parser think that there is a dot literal.
1169+
self.token = Token::new(token::Ident(sym, false), sym_span);
1170+
self.bump_with((Token::new(token::Dot, dot_span), self.token_spacing));
1171+
thin_vec![Ident::new(sym, sym_span)]
1172+
}
1173+
// 1.2 | 1.2e3
1174+
DestructuredFloat::MiddleDot(symbol1, ident1_span, _dot_span, symbol2, ident2_span) => {
1175+
self.bump();
1176+
thin_vec![Ident::new(symbol1, ident1_span), Ident::new(symbol2, ident2_span)]
1177+
}
1178+
DestructuredFloat::Error => {
1179+
self.bump();
1180+
thin_vec![Ident::new(symbol, self.prev_token.span)]
1181+
}
1182+
})
1183+
}
1184+
11181185
fn parse_expr_tuple_field_access(
11191186
&mut self,
11201187
lo: Span,
@@ -1821,10 +1888,11 @@ impl<'a> Parser<'a> {
18211888
let (fields, _trailing, _recovered) = self.parse_seq_to_before_end(
18221889
&TokenKind::CloseDelim(Delimiter::Parenthesis),
18231890
seq_sep,
1824-
Parser::parse_field_name,
1891+
Parser::parse_field_name_maybe_tuple,
18251892
)?;
1893+
let fields = fields.into_iter().flatten().collect::<Vec<_>>();
18261894
let span = lo.to(self.token.span);
1827-
Ok(self.mk_expr(span, ExprKind::OffsetOf(container, fields.to_vec().into())))
1895+
Ok(self.mk_expr(span, ExprKind::OffsetOf(container, fields.into())))
18281896
}
18291897

18301898
/// Returns a string literal if the next token is a string literal.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// run-pass
2+
// Test for issue #112204 -- make sure this goes through the entire compilation pipeline,
3+
// similar to why `offset-of-unsized.rs` is also build-pass
4+
5+
#![feature(offset_of)]
6+
#![feature(builtin_syntax)]
7+
8+
use std::mem::offset_of;
9+
10+
type ComplexTup = ((u8, (u8, (u8, u16), u8)), (u8, u32, u16));
11+
12+
fn main() {
13+
println!("{}", offset_of!(((u8, u8), u8), 0));
14+
println!("{}", offset_of!(((u8, u8), u8), 1));
15+
println!("{}", offset_of!(((u8, (u8, u8)), (u8, u8, u8)), 0.1.0));
16+
17+
// Complex case: do all combinations of spacings because the spacing determines what gets
18+
// sent to the lexer.
19+
println!("{}", offset_of!(ComplexTup, 0.1.1.1));
20+
println!("{}", builtin # offset_of(ComplexTup, 0. 1.1.1));
21+
println!("{}", offset_of!(ComplexTup, 0 . 1.1.1));
22+
println!("{}", offset_of!(ComplexTup, 0 .1.1.1));
23+
println!("{}", offset_of!(ComplexTup, 0.1 .1.1));
24+
println!("{}", offset_of!(ComplexTup, 0.1 . 1.1));
25+
println!("{}", offset_of!(ComplexTup, 0.1. 1.1));
26+
println!("{}", builtin # offset_of(ComplexTup, 0.1.1. 1));
27+
println!("{}", offset_of!(ComplexTup, 0.1.1 . 1));
28+
println!("{}", offset_of!(ComplexTup, 0.1.1 .1));
29+
30+
println!("{}", offset_of!(((u8, u16), (u32, u16, u8)), 0.0));
31+
println!("{}", offset_of!(((u8, u16), (u32, u16, u8)), 1.2));
32+
}

tests/ui/offset-of/offset-of-tuple.rs

+48-4
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,54 @@
11
#![feature(offset_of)]
22
#![feature(builtin_syntax)]
33

4+
use std::mem::offset_of;
5+
46
fn main() {
5-
core::mem::offset_of!((u8, u8), _0); //~ ERROR no field `_0`
6-
core::mem::offset_of!((u8, u8), +1); //~ ERROR no rules expected
7-
core::mem::offset_of!((u8, u8), -1); //~ ERROR no rules expected
7+
offset_of!((u8, u8), _0); //~ ERROR no field `_0`
8+
offset_of!((u8, u8), 01); //~ ERROR no field `01`
9+
offset_of!((u8, u8), 1e2); //~ ERROR no field `1e2`
10+
offset_of!((u8, u8), 1_u8); //~ ERROR no field `1_`
11+
//~| ERROR suffixes on a tuple index
12+
offset_of!((u8, u8), +1); //~ ERROR no rules expected
13+
offset_of!((u8, u8), -1); //~ ERROR no rules expected
14+
offset_of!((u8, u8), 1.); //~ ERROR expected identifier, found `)`
15+
offset_of!((u8, u8), 1 .); //~ ERROR unexpected end of macro
16+
builtin # offset_of((u8, u8), 1e2); //~ ERROR no field `1e2`
817
builtin # offset_of((u8, u8), _0); //~ ERROR no field `_0`
9-
builtin # offset_of((u8, u8), +1); //~ ERROR expected identifier
18+
builtin # offset_of((u8, u8), 01); //~ ERROR no field `01`
19+
builtin # offset_of((u8, u8), 1_u8); //~ ERROR no field `1_`
20+
//~| ERROR suffixes on a tuple index
21+
// We need to put these into curly braces, otherwise only one of the
22+
// errors will be emitted and the others suppressed.
23+
{ builtin # offset_of((u8, u8), +1) }; //~ ERROR expected identifier, found `+`
24+
{ builtin # offset_of((u8, u8), 1.) }; //~ ERROR expected identifier, found `)`
25+
{ builtin # offset_of((u8, u8), 1 .) }; //~ ERROR expected identifier, found `)`
26+
}
27+
28+
type ComplexTup = ((u8, (u8, u8)), u8);
29+
30+
fn nested() {
31+
offset_of!(((u8, u16), (u32, u16, u8)), 0.2); //~ ERROR no field `2`
32+
offset_of!(((u8, u16), (u32, u16, u8)), 1.2);
33+
offset_of!(((u8, u16), (u32, u16, u8)), 1.2.0); //~ ERROR no field `0`
34+
35+
// All combinations of spaces (this sends different tokens to the parser)
36+
offset_of!(ComplexTup, 0.0.1.); //~ ERROR expected identifier
37+
offset_of!(ComplexTup, 0 .0.1.); //~ ERROR unexpected end of macro
38+
offset_of!(ComplexTup, 0 . 0.1.); //~ ERROR unexpected end of macro
39+
offset_of!(ComplexTup, 0. 0.1.); //~ ERROR no rules expected
40+
offset_of!(ComplexTup, 0.0 .1.); //~ ERROR expected identifier, found `)`
41+
offset_of!(ComplexTup, 0.0 . 1.); //~ ERROR expected identifier, found `)`
42+
offset_of!(ComplexTup, 0.0. 1.); //~ ERROR expected identifier, found `)`
43+
44+
// Test for builtin too to ensure that the builtin syntax can also handle these cases
45+
// We need to put these into curly braces, otherwise only one of the
46+
// errors will be emitted and the others suppressed.
47+
{ builtin # offset_of(ComplexTup, 0.0.1.) }; //~ ERROR expected identifier, found `)`
48+
{ builtin # offset_of(ComplexTup, 0 .0.1.) }; //~ ERROR expected identifier, found `)`
49+
{ builtin # offset_of(ComplexTup, 0 . 0.1.) }; //~ ERROR expected identifier, found `)`
50+
{ builtin # offset_of(ComplexTup, 0. 0.1.) }; //~ ERROR expected identifier, found `)`
51+
{ builtin # offset_of(ComplexTup, 0.0 .1.) }; //~ ERROR expected identifier, found `)`
52+
{ builtin # offset_of(ComplexTup, 0.0 . 1.) }; //~ ERROR expected identifier, found `)`
53+
{ builtin # offset_of(ComplexTup, 0.0. 1.) }; //~ ERROR expected identifier, found `)`
1054
}

0 commit comments

Comments
 (0)