1
1
//! Utilities for validating string and char literals and turning them into
2
2
//! values they represent.
3
3
4
+ use std:: iter:: { Peekable , from_fn} ;
4
5
use std:: ops:: Range ;
5
- use std:: str:: Chars ;
6
+ use std:: str:: CharIndices ;
6
7
7
8
use Mode :: * ;
8
9
@@ -231,7 +232,7 @@ impl Mode {
231
232
}
232
233
233
234
fn scan_escape < T : From < char > + From < u8 > > (
234
- chars : & mut Chars < ' _ > ,
235
+ chars : & mut impl Iterator < Item = char > ,
235
236
mode : Mode ,
236
237
) -> Result < T , EscapeError > {
237
238
// Previous character was '\\', unescape what follows.
@@ -268,7 +269,10 @@ fn scan_escape<T: From<char> + From<u8>>(
268
269
Ok ( T :: from ( res) )
269
270
}
270
271
271
- fn scan_unicode ( chars : & mut Chars < ' _ > , allow_unicode_escapes : bool ) -> Result < char , EscapeError > {
272
+ fn scan_unicode (
273
+ chars : & mut impl Iterator < Item = char > ,
274
+ allow_unicode_escapes : bool ,
275
+ ) -> Result < char , EscapeError > {
272
276
// We've parsed '\u', now we have to parse '{..}'.
273
277
274
278
if chars. next ( ) != Some ( '{' ) {
@@ -326,7 +330,10 @@ fn ascii_check(c: char, allow_unicode_chars: bool) -> Result<char, EscapeError>
326
330
if allow_unicode_chars || c. is_ascii ( ) { Ok ( c) } else { Err ( EscapeError :: NonAsciiCharInByte ) }
327
331
}
328
332
329
- fn unescape_char_or_byte ( chars : & mut Chars < ' _ > , mode : Mode ) -> Result < char , EscapeError > {
333
+ fn unescape_char_or_byte (
334
+ chars : & mut impl Iterator < Item = char > ,
335
+ mode : Mode ,
336
+ ) -> Result < char , EscapeError > {
330
337
let c = chars. next ( ) . ok_or ( EscapeError :: ZeroChars ) ?;
331
338
let res = match c {
332
339
'\\' => scan_escape ( chars, mode) ,
@@ -346,63 +353,52 @@ fn unescape_non_raw_common<F, T: From<char> + From<u8>>(src: &str, mode: Mode, c
346
353
where
347
354
F : FnMut ( Range < usize > , Result < T , EscapeError > ) ,
348
355
{
349
- let mut chars = src. chars ( ) ;
350
356
let allow_unicode_chars = mode. allow_unicode_chars ( ) ; // get this outside the loop
351
357
352
- // The `start` and `end` computation here is complicated because
353
- // `skip_ascii_whitespace` makes us to skip over chars without counting
354
- // them in the range computation.
355
- while let Some ( c) = chars. next ( ) {
356
- let start = src. len ( ) - chars. as_str ( ) . len ( ) - c. len_utf8 ( ) ;
358
+ let mut chars = src. char_indices ( ) . peekable ( ) ;
359
+ while let Some ( ( start, c) ) = chars. next ( ) {
357
360
let res = match c {
358
- '\\' => {
359
- match chars. clone ( ) . next ( ) {
360
- Some ( '\n' ) => {
361
- // Rust language specification requires us to skip whitespaces
362
- // if unescaped '\' character is followed by '\n'.
363
- // For details see [Rust language reference]
364
- // (https://doc.rust-lang.org/reference/tokens.html#string-literals).
365
- skip_ascii_whitespace ( & mut chars, start, & mut |range, err| {
366
- callback ( range, Err ( err) )
367
- } ) ;
368
- continue ;
369
- }
370
- _ => scan_escape :: < T > ( & mut chars, mode) ,
371
- }
361
+ // skip whitespace for backslash newline, see [Rust language reference]
362
+ // (https://doc.rust-lang.org/reference/tokens.html#string-literals).
363
+ '\\' if chars. next_if ( |& ( _, c) | c == '\n' ) . is_some ( ) => {
364
+ let mut callback_err = |range, err| callback ( range, Err ( err) ) ;
365
+ skip_ascii_whitespace ( & mut chars, start, & mut callback_err) ;
366
+ continue ;
372
367
}
368
+ '\\' => scan_escape :: < T > ( & mut from_fn ( || chars. next ( ) . map ( |i| i. 1 ) ) , mode) ,
373
369
'"' => Err ( EscapeError :: EscapeOnlyChar ) ,
374
370
'\r' => Err ( EscapeError :: BareCarriageReturn ) ,
375
371
_ => ascii_check ( c, allow_unicode_chars) . map ( T :: from) ,
376
372
} ;
377
- let end = src . len ( ) - chars . as_str ( ) . len ( ) ;
373
+ let end = chars . peek ( ) . map ( | & ( end , _ ) | end ) . unwrap_or ( src . len ( ) ) ;
378
374
callback ( start..end, res) ;
379
375
}
380
376
}
381
377
382
- fn skip_ascii_whitespace < F > ( chars : & mut Chars < ' _ > , start : usize , callback : & mut F )
378
+ /// Skip ASCII whitespace, except for the formfeed character
379
+ /// (see [this issue](https://github.com/rust-lang/rust/issues/136600)).
380
+ /// Warns on unescaped newline and following non-ASCII whitespace.
381
+ fn skip_ascii_whitespace < F > ( chars : & mut Peekable < CharIndices < ' _ > > , start : usize , callback : & mut F )
383
382
where
384
383
F : FnMut ( Range < usize > , EscapeError ) ,
385
384
{
386
- let tail = chars. as_str ( ) ;
387
- let first_non_space = tail
388
- . bytes ( )
389
- . position ( |b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r' )
390
- . unwrap_or ( tail. len ( ) ) ;
391
- if tail[ 1 ..first_non_space] . contains ( '\n' ) {
392
- // The +1 accounts for the escaping slash.
393
- let end = start + first_non_space + 1 ;
385
+ // the escaping slash and newline characters add 2 bytes
386
+ let mut end = start + 2 ;
387
+ let mut contains_nl = false ;
388
+ while let Some ( ( _, c) ) = chars. next_if ( |& ( _, c) | c. is_ascii_whitespace ( ) && c != '\x0c' ) {
389
+ end += 1 ;
390
+ contains_nl = contains_nl || c == '\n' ;
391
+ }
392
+
393
+ if contains_nl {
394
394
callback ( start..end, EscapeError :: MultipleSkippedLinesWarning ) ;
395
395
}
396
- let tail = & tail[ first_non_space..] ;
397
- if let Some ( c) = tail. chars ( ) . next ( ) {
396
+ if let Some ( ( _, c) ) = chars. peek ( ) {
398
397
if c. is_whitespace ( ) {
399
- // For error reporting, we would like the span to contain the character that was not
400
- // skipped. The +1 is necessary to account for the leading \ that started the escape.
401
- let end = start + first_non_space + c. len_utf8 ( ) + 1 ;
402
- callback ( start..end, EscapeError :: UnskippedWhitespaceWarning ) ;
398
+ // for error reporting, include the character that was not skipped in the span
399
+ callback ( start..end + c. len_utf8 ( ) , EscapeError :: UnskippedWhitespaceWarning ) ;
403
400
}
404
401
}
405
- * chars = tail. chars ( ) ;
406
402
}
407
403
408
404
/// Takes a contents of a string literal (without quotes) and produces a
0 commit comments