Skip to content

Commit 1d0b161

Browse files
committed
[EXPERIMENT] Disallow all literal suffixes except the standard numeric ones.
Partly out of curiosity, and partly because this would significantly simplify parts of the lexer and parser.
1 parent d726c84 commit 1d0b161

21 files changed

+301
-329
lines changed

compiler/rustc_ast/src/ast.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -1729,9 +1729,9 @@ pub enum LitFloatType {
17291729
Unsuffixed,
17301730
}
17311731

1732-
/// Literal kind.
1733-
///
1734-
/// E.g., `"foo"`, `42`, `12.34`, or `bool`.
1732+
/// Note that the entire literal (including the suffix) is considered when
1733+
/// deciding the `LitKind`. This means that float literals like `1f32` are
1734+
/// classified by this type as `Float`.
17351735
#[derive(Clone, Encodable, Decodable, Debug, Hash, Eq, PartialEq, HashStable_Generic)]
17361736
pub enum LitKind {
17371737
/// A string literal (`"foo"`). The symbol is unescaped, and so may differ
@@ -1745,8 +1745,8 @@ pub enum LitKind {
17451745
Char(char),
17461746
/// An integer literal (`1`).
17471747
Int(u128, LitIntType),
1748-
/// A float literal (`1f64` or `1E10f64`). Stored as a symbol rather than
1749-
/// `f64` so that `LitKind` can impl `Eq` and `Hash`.
1748+
/// A float literal (`1.0`, `1f64` or `1E10f64`). Stored as a symbol rather
1749+
/// than `f64` so that `LitKind` can impl `Eq` and `Hash`.
17501750
Float(Symbol, LitFloatType),
17511751
/// A boolean literal.
17521752
Bool(bool),

compiler/rustc_ast/src/token.rs

+6-16
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,16 @@ pub enum Delimiter {
5858
Invisible,
5959
}
6060

61+
/// Note that the entire literal (including the suffix) is considered when
62+
/// deciding the `LitKind`. This means that float literals like `1f32` are
63+
/// classified by this type as `Float`.
6164
#[derive(Clone, Copy, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
6265
pub enum LitKind {
6366
Bool, // AST only, must never appear in a `Token`
6467
Byte,
6568
Char,
66-
Integer,
67-
Float,
69+
Integer, // e.g. `1`, `1u8`
70+
Float, // e.g. `1.`, `1.0`, `1f32`, `1e3f32`
6871
Str,
6972
StrRaw(u8), // raw string delimited by `n` hash symbols
7073
ByteStr,
@@ -77,7 +80,7 @@ pub enum LitKind {
7780
pub struct Lit {
7881
pub kind: LitKind,
7982
pub symbol: Symbol,
80-
pub suffix: Option<Symbol>,
83+
pub suffix: Option<Symbol>, // njn: change to a type?
8184
}
8285

8386
impl fmt::Display for Lit {
@@ -120,19 +123,6 @@ impl LitKind {
120123
}
121124
}
122125

123-
pub fn descr(self) -> &'static str {
124-
match self {
125-
Bool => panic!("literal token contains `Lit::Bool`"),
126-
Byte => "byte",
127-
Char => "char",
128-
Integer => "integer",
129-
Float => "float",
130-
Str | StrRaw(..) => "string",
131-
ByteStr | ByteStrRaw(..) => "byte string",
132-
Err => "error",
133-
}
134-
}
135-
136126
pub(crate) fn may_have_suffix(self) -> bool {
137127
matches!(self, Integer | Float | Err)
138128
}

compiler/rustc_ast/src/util/literal.rs

+14-24
Original file line numberDiff line numberDiff line change
@@ -10,22 +10,21 @@ use rustc_span::Span;
1010

1111
use std::ascii;
1212

13+
// njn: how much of this will be left?
1314
pub enum LitError {
1415
NotLiteral,
1516
LexerError,
16-
InvalidSuffix,
17-
InvalidIntSuffix,
18-
InvalidFloatSuffix,
19-
NonDecimalFloat(u32),
2017
IntTooLarge,
2118
}
2219

2320
impl LitKind {
2421
/// Converts literal token into a semantic literal.
2522
pub fn from_token_lit(lit: token::Lit) -> Result<LitKind, LitError> {
2623
let token::Lit { kind, symbol, suffix } = lit;
24+
// njn: could even move the suffix into `kind`...
2725
if suffix.is_some() && !kind.may_have_suffix() {
28-
return Err(LitError::InvalidSuffix);
26+
// njn: yuk
27+
return Err(LitError::LexerError);
2928
}
3029

3130
Ok(match kind {
@@ -259,33 +258,23 @@ fn strip_underscores(symbol: Symbol) -> Symbol {
259258
symbol
260259
}
261260

262-
fn filtered_float_lit(
263-
symbol: Symbol,
264-
suffix: Option<Symbol>,
265-
base: u32,
266-
) -> Result<LitKind, LitError> {
267-
debug!("filtered_float_lit: {:?}, {:?}, {:?}", symbol, suffix, base);
268-
if base != 10 {
269-
return Err(LitError::NonDecimalFloat(base));
270-
}
261+
fn float_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
262+
debug!("float_lit: {:?}, {:?}", symbol, suffix);
263+
let symbol = strip_underscores(symbol);
264+
271265
Ok(match suffix {
272266
Some(suf) => LitKind::Float(
273267
symbol,
274268
ast::LitFloatType::Suffixed(match suf {
275269
sym::f32 => ast::FloatTy::F32,
276270
sym::f64 => ast::FloatTy::F64,
277-
_ => return Err(LitError::InvalidFloatSuffix),
271+
_ => return Err(LitError::LexerError),
278272
}),
279273
),
280274
None => LitKind::Float(symbol, ast::LitFloatType::Unsuffixed),
281275
})
282276
}
283277

284-
fn float_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
285-
debug!("float_lit: {:?}, {:?}", symbol, suffix);
286-
filtered_float_lit(strip_underscores(symbol), suffix, 10)
287-
}
288-
289278
fn integer_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
290279
debug!("integer_lit: {:?}, {:?}", symbol, suffix);
291280
let symbol = strip_underscores(symbol);
@@ -312,10 +301,11 @@ fn integer_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitErr
312301
sym::u32 => ast::LitIntType::Unsigned(ast::UintTy::U32),
313302
sym::u64 => ast::LitIntType::Unsigned(ast::UintTy::U64),
314303
sym::u128 => ast::LitIntType::Unsigned(ast::UintTy::U128),
315-
// `1f64` and `2f32` etc. are valid float literals, and
316-
// `fxxx` looks more like an invalid float literal than invalid integer literal.
317-
_ if suf.as_str().starts_with('f') => return filtered_float_lit(symbol, suffix, base),
318-
_ => return Err(LitError::InvalidIntSuffix),
304+
_ =>
305+
//return Err(LitError::LexerError), // njn: hmm
306+
{
307+
return Ok(ast::LitKind::Err);
308+
}
319309
},
320310
_ => ast::LitIntType::Unsuffixed,
321311
};

compiler/rustc_ast_lowering/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -962,6 +962,7 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> {
962962
let lit = if let ExprKind::Lit(lit) = &expr.kind {
963963
lit.clone()
964964
} else {
965+
// njn: use Lit::from_token_lit here?
965966
Lit {
966967
token_lit: token::Lit::new(token::LitKind::Err, kw::Empty, None),
967968
kind: LitKind::Err,

compiler/rustc_lexer/src/lib.rs

+18-1
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,13 @@ pub enum DocStyle {
165165
Inner,
166166
}
167167

168+
// Note that the suffix is *not* considered when deciding the `LiteralKind` in
169+
// this type. This means that float literals like `1f32` are classified by this
170+
// type as `Int`. (Compare against `rustc_ast::token::LitKind` and
171+
// `rustc_ast::ast::LitKind.)
168172
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
169173
pub enum LiteralKind {
170-
/// "12_u8", "0o100", "0b120i99"
174+
/// "12_u8", "0o100", "0b120i99", "1f32".
171175
Int { base: Base, empty_int: bool },
172176
/// "12.34f32", "0b100.100"
173177
Float { base: Base, empty_exponent: bool },
@@ -187,6 +191,19 @@ pub enum LiteralKind {
187191
RawByteStr { n_hashes: Option<u8> },
188192
}
189193

194+
impl LiteralKind {
195+
pub fn descr(self) -> &'static str {
196+
match self {
197+
Int { .. } => "integer",
198+
Float { .. } => "float",
199+
Char { .. } => "char",
200+
Byte { .. } => "byte",
201+
Str { .. } | RawStr { .. } => "string",
202+
ByteStr { .. } | RawByteStr { .. } => "byte string",
203+
}
204+
}
205+
}
206+
190207
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
191208
pub enum RawStrError {
192209
/// Non `#` characters exist between `r` and `"`, e.g. `r##~"abcde"##`

0 commit comments

Comments
 (0)