15
15
#![ allow( non_snake_case) ]
16
16
#![ doc( primitive = "char" ) ]
17
17
18
+ use iter:: Iterator ;
18
19
use mem:: transmute;
19
20
use ops:: FnMut ;
20
- use option:: Option ;
21
21
use option:: Option :: { None , Some } ;
22
- use iter :: { range_step , Iterator , RangeStep } ;
22
+ use option :: Option ;
23
23
use slice:: SliceExt ;
24
24
25
25
// UTF-8 ranges and tags for encoding characters
@@ -156,34 +156,15 @@ pub fn from_digit(num: uint, radix: uint) -> Option<char> {
156
156
}
157
157
}
158
158
159
- ///
160
- /// Returns the hexadecimal Unicode escape of a `char`
161
- ///
162
- /// The rules are as follows:
163
- ///
164
- /// - chars in [0,0xff] get 2-digit escapes: `\\xNN`
165
- /// - chars in [0x100,0xffff] get 4-digit escapes: `\\u{NNNN}`
166
- /// - chars above 0x10000 get 8-digit escapes: `\\u{{NNN}NNNNN}`
167
- ///
159
+ /// Deprecated, call the escape_unicode method instead.
168
160
#[ deprecated = "use the Char::escape_unicode method" ]
169
161
pub fn escape_unicode < F > ( c : char , mut f : F ) where F : FnMut ( char ) {
170
162
for char in c. escape_unicode ( ) {
171
163
f ( char) ;
172
164
}
173
165
}
174
166
175
- ///
176
- /// Returns a 'default' ASCII and C++11-like literal escape of a `char`
177
- ///
178
- /// The default is chosen with a bias toward producing literals that are
179
- /// legal in a variety of languages, including C++11 and similar C-family
180
- /// languages. The exact rules are:
181
- ///
182
- /// - Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively.
183
- /// - Single-quote, double-quote and backslash chars are backslash-escaped.
184
- /// - Any other chars in the range [0x20,0x7e] are not escaped.
185
- /// - Any other chars are given hex Unicode escapes; see `escape_unicode`.
186
- ///
167
+ /// Deprecated, call the escape_default method instead.
187
168
#[ deprecated = "use the Char::escape_default method" ]
188
169
pub fn escape_default < F > ( c : char , mut f : F ) where F : FnMut ( char ) {
189
170
for c in c. escape_default ( ) {
@@ -267,13 +248,11 @@ pub trait Char {
267
248
/// Returns an iterator that yields the hexadecimal Unicode escape
268
249
/// of a character, as `char`s.
269
250
///
270
- /// The rules are as follows:
271
- ///
272
- /// * Characters in [0,0xff] get 2-digit escapes: `\\xNN`
273
- /// * Characters in [0x100,0xffff] get 4-digit escapes: `\\u{NNNN}`.
274
- /// * Characters above 0x10000 get 8-digit escapes: `\\u{{NNN}NNNNN}`.
251
+ /// All characters are escaped with Rust syntax of the form `\\u{NNNN}`
252
+ /// where `NNNN` is the shortest hexadecimal representation of the code
253
+ /// point.
275
254
#[ unstable = "pending error conventions, trait organization" ]
276
- fn escape_unicode ( self ) -> UnicodeEscapedChars ;
255
+ fn escape_unicode ( self ) -> EscapeUnicode ;
277
256
278
257
/// Returns an iterator that yields the 'default' ASCII and
279
258
/// C++11-like literal escape of a character, as `char`s.
@@ -288,7 +267,7 @@ pub trait Char {
288
267
/// * Any other chars in the range [0x20,0x7e] are not escaped.
289
268
/// * Any other chars are given hex Unicode escapes; see `escape_unicode`.
290
269
#[ unstable = "pending error conventions, trait organization" ]
291
- fn escape_default ( self ) -> DefaultEscapedChars ;
270
+ fn escape_default ( self ) -> EscapeDefault ;
292
271
293
272
/// Returns the amount of bytes this character would need if encoded in
294
273
/// UTF-8.
@@ -358,23 +337,23 @@ impl Char for char {
358
337
fn from_u32 ( i : u32 ) -> Option < char > { from_u32 ( i) }
359
338
360
339
#[ unstable = "pending error conventions, trait organization" ]
361
- fn escape_unicode ( self ) -> UnicodeEscapedChars {
362
- UnicodeEscapedChars { c : self , state : UnicodeEscapedCharsState :: Backslash }
340
+ fn escape_unicode ( self ) -> EscapeUnicode {
341
+ EscapeUnicode { c : self , state : EscapeUnicodeState :: Backslash }
363
342
}
364
343
365
344
#[ unstable = "pending error conventions, trait organization" ]
366
- fn escape_default ( self ) -> DefaultEscapedChars {
345
+ fn escape_default ( self ) -> EscapeDefault {
367
346
let init_state = match self {
368
- '\t' => DefaultEscapedCharsState :: Backslash ( 't' ) ,
369
- '\r' => DefaultEscapedCharsState :: Backslash ( 'r' ) ,
370
- '\n' => DefaultEscapedCharsState :: Backslash ( 'n' ) ,
371
- '\\' => DefaultEscapedCharsState :: Backslash ( '\\' ) ,
372
- '\'' => DefaultEscapedCharsState :: Backslash ( '\'' ) ,
373
- '"' => DefaultEscapedCharsState :: Backslash ( '"' ) ,
374
- '\x20' ... '\x7e' => DefaultEscapedCharsState :: Char ( self ) ,
375
- _ => DefaultEscapedCharsState :: Unicode ( self . escape_unicode ( ) )
347
+ '\t' => EscapeDefaultState :: Backslash ( 't' ) ,
348
+ '\r' => EscapeDefaultState :: Backslash ( 'r' ) ,
349
+ '\n' => EscapeDefaultState :: Backslash ( 'n' ) ,
350
+ '\\' => EscapeDefaultState :: Backslash ( '\\' ) ,
351
+ '\'' => EscapeDefaultState :: Backslash ( '\'' ) ,
352
+ '"' => EscapeDefaultState :: Backslash ( '"' ) ,
353
+ '\x20' ... '\x7e' => EscapeDefaultState :: Char ( self ) ,
354
+ _ => EscapeDefaultState :: Unicode ( self . escape_unicode ( ) )
376
355
} ;
377
- DefaultEscapedChars { state : init_state }
356
+ EscapeDefault { state : init_state }
378
357
}
379
358
380
359
#[ inline]
@@ -451,72 +430,86 @@ impl Char for char {
451
430
452
431
/// An iterator over the characters that represent a `char`, as escaped by
453
432
/// Rust's unicode escaping rules.
454
- pub struct UnicodeEscapedChars {
433
+ pub struct EscapeUnicode {
455
434
c : char ,
456
- state : UnicodeEscapedCharsState
435
+ state : EscapeUnicodeState
457
436
}
458
437
459
- enum UnicodeEscapedCharsState {
438
+ enum EscapeUnicodeState {
460
439
Backslash ,
461
440
Type ,
462
- Value ( RangeStep < i32 > ) ,
441
+ LeftBrace ,
442
+ Value ( uint ) ,
443
+ RightBrace ,
444
+ Done ,
463
445
}
464
446
465
- impl Iterator < char > for UnicodeEscapedChars {
447
+ impl Iterator < char > for EscapeUnicode {
466
448
fn next ( & mut self ) -> Option < char > {
467
449
match self . state {
468
- UnicodeEscapedCharsState :: Backslash => {
469
- self . state = UnicodeEscapedCharsState :: Type ;
450
+ EscapeUnicodeState :: Backslash => {
451
+ self . state = EscapeUnicodeState :: Type ;
470
452
Some ( '\\' )
471
453
}
472
- UnicodeEscapedCharsState :: Type => {
473
- let ( typechar, pad) = if self . c <= '\x7f' { ( 'x' , 2 ) }
474
- else if self . c <= '\u{ffff}' { ( 'u' , 4 ) }
475
- else { ( 'U' , 8 ) } ;
476
- self . state = UnicodeEscapedCharsState :: Value ( range_step ( 4 * ( pad - 1 ) , -1 , -4i32 ) ) ;
477
- Some ( typechar)
454
+ EscapeUnicodeState :: Type => {
455
+ self . state = EscapeUnicodeState :: LeftBrace ;
456
+ Some ( 'u' )
478
457
}
479
- UnicodeEscapedCharsState :: Value ( ref mut range_step) => match range_step. next ( ) {
480
- Some ( offset) => {
481
- let offset = offset as uint ;
482
- let v = match ( ( self . c as i32 ) >> offset) & 0xf {
483
- i @ 0 ... 9 => '0' as i32 + i,
484
- i => 'a' as i32 + ( i - 10 )
485
- } ;
486
- Some ( unsafe { transmute ( v) } )
458
+ EscapeUnicodeState :: LeftBrace => {
459
+ let mut n = 0 u;
460
+ while ( self . c as u32 ) >> ( 4 * ( n + 1 ) ) != 0 {
461
+ n += 1 ;
487
462
}
488
- None => None
463
+ self . state = EscapeUnicodeState :: Value ( n) ;
464
+ Some ( '{' )
465
+ }
466
+ EscapeUnicodeState :: Value ( offset) => {
467
+ let v = match ( ( self . c as i32 ) >> ( offset * 4 ) ) & 0xf {
468
+ i @ 0 ... 9 => '0' as i32 + i,
469
+ i => 'a' as i32 + ( i - 10 )
470
+ } ;
471
+ if offset == 0 {
472
+ self . state = EscapeUnicodeState :: RightBrace ;
473
+ } else {
474
+ self . state = EscapeUnicodeState :: Value ( offset - 1 ) ;
475
+ }
476
+ Some ( unsafe { transmute ( v) } )
477
+ }
478
+ EscapeUnicodeState :: RightBrace => {
479
+ self . state = EscapeUnicodeState :: Done ;
480
+ Some ( '}' )
489
481
}
482
+ EscapeUnicodeState :: Done => None ,
490
483
}
491
484
}
492
485
}
493
486
494
487
/// An iterator over the characters that represent a `char`, escaped
495
488
/// for maximum portability.
496
- pub struct DefaultEscapedChars {
497
- state : DefaultEscapedCharsState
489
+ pub struct EscapeDefault {
490
+ state : EscapeDefaultState
498
491
}
499
492
500
- enum DefaultEscapedCharsState {
493
+ enum EscapeDefaultState {
501
494
Backslash ( char ) ,
502
495
Char ( char ) ,
503
496
Done ,
504
- Unicode ( UnicodeEscapedChars ) ,
497
+ Unicode ( EscapeUnicode ) ,
505
498
}
506
499
507
- impl Iterator < char > for DefaultEscapedChars {
500
+ impl Iterator < char > for EscapeDefault {
508
501
fn next ( & mut self ) -> Option < char > {
509
502
match self . state {
510
- DefaultEscapedCharsState :: Backslash ( c) => {
511
- self . state = DefaultEscapedCharsState :: Char ( c) ;
503
+ EscapeDefaultState :: Backslash ( c) => {
504
+ self . state = EscapeDefaultState :: Char ( c) ;
512
505
Some ( '\\' )
513
506
}
514
- DefaultEscapedCharsState :: Char ( c) => {
515
- self . state = DefaultEscapedCharsState :: Done ;
507
+ EscapeDefaultState :: Char ( c) => {
508
+ self . state = EscapeDefaultState :: Done ;
516
509
Some ( c)
517
510
}
518
- DefaultEscapedCharsState :: Done => None ,
519
- DefaultEscapedCharsState :: Unicode ( ref mut iter) => iter. next ( )
511
+ EscapeDefaultState :: Done => None ,
512
+ EscapeDefaultState :: Unicode ( ref mut iter) => iter. next ( )
520
513
}
521
514
}
522
515
}
0 commit comments