13
13
use slice;
14
14
use str:: from_utf8_unchecked_mut;
15
15
use super :: * ;
16
- use super :: CharExt as C ;
17
16
use super :: printable:: is_printable;
18
17
use unicode:: tables:: { conversions, derived_property, general_category, property} ;
19
18
20
- #[ stable( feature = "core" , since = "1.6.0" ) ]
21
- impl CharExt for char {
22
- #[ inline]
23
- fn is_digit ( self , radix : u32 ) -> bool {
24
- self . to_digit ( radix) . is_some ( )
25
- }
26
-
27
- #[ inline]
28
- fn to_digit ( self , radix : u32 ) -> Option < u32 > {
29
- if radix > 36 {
30
- panic ! ( "to_digit: radix is too high (maximum 36)" ) ;
31
- }
32
- let val = match self {
33
- '0' ... '9' => self as u32 - '0' as u32 ,
34
- 'a' ... 'z' => self as u32 - 'a' as u32 + 10 ,
35
- 'A' ... 'Z' => self as u32 - 'A' as u32 + 10 ,
36
- _ => return None ,
37
- } ;
38
- if val < radix { Some ( val) }
39
- else { None }
40
- }
41
-
42
- #[ inline]
43
- fn escape_unicode ( self ) -> EscapeUnicode {
44
- let c = self as u32 ;
45
-
46
- // or-ing 1 ensures that for c==0 the code computes that one
47
- // digit should be printed and (which is the same) avoids the
48
- // (31 - 32) underflow
49
- let msb = 31 - ( c | 1 ) . leading_zeros ( ) ;
50
-
51
- // the index of the most significant hex digit
52
- let ms_hex_digit = msb / 4 ;
53
- EscapeUnicode {
54
- c : self ,
55
- state : EscapeUnicodeState :: Backslash ,
56
- hex_digit_idx : ms_hex_digit as usize ,
57
- }
58
- }
59
-
60
- #[ inline]
61
- fn escape_default ( self ) -> EscapeDefault {
62
- let init_state = match self {
63
- '\t' => EscapeDefaultState :: Backslash ( 't' ) ,
64
- '\r' => EscapeDefaultState :: Backslash ( 'r' ) ,
65
- '\n' => EscapeDefaultState :: Backslash ( 'n' ) ,
66
- '\\' | '\'' | '"' => EscapeDefaultState :: Backslash ( self ) ,
67
- '\x20' ... '\x7e' => EscapeDefaultState :: Char ( self ) ,
68
- _ => EscapeDefaultState :: Unicode ( self . escape_unicode ( ) )
69
- } ;
70
- EscapeDefault { state : init_state }
71
- }
72
-
73
- #[ inline]
74
- fn escape_debug ( self ) -> EscapeDebug {
75
- let init_state = match self {
76
- '\t' => EscapeDefaultState :: Backslash ( 't' ) ,
77
- '\r' => EscapeDefaultState :: Backslash ( 'r' ) ,
78
- '\n' => EscapeDefaultState :: Backslash ( 'n' ) ,
79
- '\\' | '\'' | '"' => EscapeDefaultState :: Backslash ( self ) ,
80
- c if is_printable ( c) => EscapeDefaultState :: Char ( c) ,
81
- c => EscapeDefaultState :: Unicode ( c. escape_unicode ( ) ) ,
82
- } ;
83
- EscapeDebug ( EscapeDefault { state : init_state } )
84
- }
85
-
86
- #[ inline]
87
- fn len_utf8 ( self ) -> usize {
88
- let code = self as u32 ;
89
- if code < MAX_ONE_B {
90
- 1
91
- } else if code < MAX_TWO_B {
92
- 2
93
- } else if code < MAX_THREE_B {
94
- 3
95
- } else {
96
- 4
97
- }
98
- }
99
-
100
- #[ inline]
101
- fn len_utf16 ( self ) -> usize {
102
- let ch = self as u32 ;
103
- if ( ch & 0xFFFF ) == ch { 1 } else { 2 }
104
- }
105
-
106
- #[ inline]
107
- fn encode_utf8 ( self , dst : & mut [ u8 ] ) -> & mut str {
108
- let code = self as u32 ;
109
- unsafe {
110
- let len =
111
- if code < MAX_ONE_B && !dst. is_empty ( ) {
112
- * dst. get_unchecked_mut ( 0 ) = code as u8 ;
113
- 1
114
- } else if code < MAX_TWO_B && dst. len ( ) >= 2 {
115
- * dst. get_unchecked_mut ( 0 ) = ( code >> 6 & 0x1F ) as u8 | TAG_TWO_B ;
116
- * dst. get_unchecked_mut ( 1 ) = ( code & 0x3F ) as u8 | TAG_CONT ;
117
- 2
118
- } else if code < MAX_THREE_B && dst. len ( ) >= 3 {
119
- * dst. get_unchecked_mut ( 0 ) = ( code >> 12 & 0x0F ) as u8 | TAG_THREE_B ;
120
- * dst. get_unchecked_mut ( 1 ) = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
121
- * dst. get_unchecked_mut ( 2 ) = ( code & 0x3F ) as u8 | TAG_CONT ;
122
- 3
123
- } else if dst. len ( ) >= 4 {
124
- * dst. get_unchecked_mut ( 0 ) = ( code >> 18 & 0x07 ) as u8 | TAG_FOUR_B ;
125
- * dst. get_unchecked_mut ( 1 ) = ( code >> 12 & 0x3F ) as u8 | TAG_CONT ;
126
- * dst. get_unchecked_mut ( 2 ) = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
127
- * dst. get_unchecked_mut ( 3 ) = ( code & 0x3F ) as u8 | TAG_CONT ;
128
- 4
129
- } else {
130
- panic ! ( "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}" ,
131
- from_u32_unchecked( code) . len_utf8( ) ,
132
- code,
133
- dst. len( ) )
134
- } ;
135
- from_utf8_unchecked_mut ( dst. get_unchecked_mut ( ..len) )
136
- }
137
- }
138
-
139
- #[ inline]
140
- fn encode_utf16 ( self , dst : & mut [ u16 ] ) -> & mut [ u16 ] {
141
- let mut code = self as u32 ;
142
- unsafe {
143
- if ( code & 0xFFFF ) == code && !dst. is_empty ( ) {
144
- // The BMP falls through (assuming non-surrogate, as it should)
145
- * dst. get_unchecked_mut ( 0 ) = code as u16 ;
146
- slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , 1 )
147
- } else if dst. len ( ) >= 2 {
148
- // Supplementary planes break into surrogates.
149
- code -= 0x1_0000 ;
150
- * dst. get_unchecked_mut ( 0 ) = 0xD800 | ( ( code >> 10 ) as u16 ) ;
151
- * dst. get_unchecked_mut ( 1 ) = 0xDC00 | ( ( code as u16 ) & 0x3FF ) ;
152
- slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , 2 )
153
- } else {
154
- panic ! ( "encode_utf16: need {} units to encode U+{:X}, but the buffer has {}" ,
155
- from_u32_unchecked( code) . len_utf16( ) ,
156
- code,
157
- dst. len( ) )
158
- }
159
- }
160
- }
161
- }
162
-
163
19
#[ lang = "char" ]
164
20
impl char {
165
21
/// Checks if a `char` is a digit in the given radix.
@@ -211,7 +67,7 @@ impl char {
211
67
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
212
68
#[ inline]
213
69
pub fn is_digit ( self , radix : u32 ) -> bool {
214
- C :: is_digit ( self , radix)
70
+ self . to_digit ( radix) . is_some ( )
215
71
}
216
72
217
73
/// Converts a `char` to a digit in the given radix.
@@ -265,7 +121,17 @@ impl char {
265
121
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
266
122
#[ inline]
267
123
pub fn to_digit ( self , radix : u32 ) -> Option < u32 > {
268
- C :: to_digit ( self , radix)
124
+ if radix > 36 {
125
+ panic ! ( "to_digit: radix is too high (maximum 36)" ) ;
126
+ }
127
+ let val = match self {
128
+ '0' ... '9' => self as u32 - '0' as u32 ,
129
+ 'a' ... 'z' => self as u32 - 'a' as u32 + 10 ,
130
+ 'A' ... 'Z' => self as u32 - 'A' as u32 + 10 ,
131
+ _ => return None ,
132
+ } ;
133
+ if val < radix { Some ( val) }
134
+ else { None }
269
135
}
270
136
271
137
/// Returns an iterator that yields the hexadecimal Unicode escape of a
@@ -305,7 +171,20 @@ impl char {
305
171
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
306
172
#[ inline]
307
173
pub fn escape_unicode ( self ) -> EscapeUnicode {
308
- C :: escape_unicode ( self )
174
+ let c = self as u32 ;
175
+
176
+ // or-ing 1 ensures that for c==0 the code computes that one
177
+ // digit should be printed and (which is the same) avoids the
178
+ // (31 - 32) underflow
179
+ let msb = 31 - ( c | 1 ) . leading_zeros ( ) ;
180
+
181
+ // the index of the most significant hex digit
182
+ let ms_hex_digit = msb / 4 ;
183
+ EscapeUnicode {
184
+ c : self ,
185
+ state : EscapeUnicodeState :: Backslash ,
186
+ hex_digit_idx : ms_hex_digit as usize ,
187
+ }
309
188
}
310
189
311
190
/// Returns an iterator that yields the literal escape code of a character
@@ -345,7 +224,15 @@ impl char {
345
224
#[ stable( feature = "char_escape_debug" , since = "1.20.0" ) ]
346
225
#[ inline]
347
226
pub fn escape_debug ( self ) -> EscapeDebug {
348
- C :: escape_debug ( self )
227
+ let init_state = match self {
228
+ '\t' => EscapeDefaultState :: Backslash ( 't' ) ,
229
+ '\r' => EscapeDefaultState :: Backslash ( 'r' ) ,
230
+ '\n' => EscapeDefaultState :: Backslash ( 'n' ) ,
231
+ '\\' | '\'' | '"' => EscapeDefaultState :: Backslash ( self ) ,
232
+ c if is_printable ( c) => EscapeDefaultState :: Char ( c) ,
233
+ c => EscapeDefaultState :: Unicode ( c. escape_unicode ( ) ) ,
234
+ } ;
235
+ EscapeDebug ( EscapeDefault { state : init_state } )
349
236
}
350
237
351
238
/// Returns an iterator that yields the literal escape code of a character
@@ -400,7 +287,15 @@ impl char {
400
287
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
401
288
#[ inline]
402
289
pub fn escape_default ( self ) -> EscapeDefault {
403
- C :: escape_default ( self )
290
+ let init_state = match self {
291
+ '\t' => EscapeDefaultState :: Backslash ( 't' ) ,
292
+ '\r' => EscapeDefaultState :: Backslash ( 'r' ) ,
293
+ '\n' => EscapeDefaultState :: Backslash ( 'n' ) ,
294
+ '\\' | '\'' | '"' => EscapeDefaultState :: Backslash ( self ) ,
295
+ '\x20' ... '\x7e' => EscapeDefaultState :: Char ( self ) ,
296
+ _ => EscapeDefaultState :: Unicode ( self . escape_unicode ( ) )
297
+ } ;
298
+ EscapeDefault { state : init_state }
404
299
}
405
300
406
301
/// Returns the number of bytes this `char` would need if encoded in UTF-8.
@@ -451,7 +346,16 @@ impl char {
451
346
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
452
347
#[ inline]
453
348
pub fn len_utf8 ( self ) -> usize {
454
- C :: len_utf8 ( self )
349
+ let code = self as u32 ;
350
+ if code < MAX_ONE_B {
351
+ 1
352
+ } else if code < MAX_TWO_B {
353
+ 2
354
+ } else if code < MAX_THREE_B {
355
+ 3
356
+ } else {
357
+ 4
358
+ }
455
359
}
456
360
457
361
/// Returns the number of 16-bit code units this `char` would need if
@@ -476,7 +380,8 @@ impl char {
476
380
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
477
381
#[ inline]
478
382
pub fn len_utf16 ( self ) -> usize {
479
- C :: len_utf16 ( self )
383
+ let ch = self as u32 ;
384
+ if ( ch & 0xFFFF ) == ch { 1 } else { 2 }
480
385
}
481
386
482
387
/// Encodes this character as UTF-8 into the provided byte buffer,
@@ -518,7 +423,35 @@ impl char {
518
423
#[ stable( feature = "unicode_encode_char" , since = "1.15.0" ) ]
519
424
#[ inline]
520
425
pub fn encode_utf8 ( self , dst : & mut [ u8 ] ) -> & mut str {
521
- C :: encode_utf8 ( self , dst)
426
+ let code = self as u32 ;
427
+ unsafe {
428
+ let len =
429
+ if code < MAX_ONE_B && !dst. is_empty ( ) {
430
+ * dst. get_unchecked_mut ( 0 ) = code as u8 ;
431
+ 1
432
+ } else if code < MAX_TWO_B && dst. len ( ) >= 2 {
433
+ * dst. get_unchecked_mut ( 0 ) = ( code >> 6 & 0x1F ) as u8 | TAG_TWO_B ;
434
+ * dst. get_unchecked_mut ( 1 ) = ( code & 0x3F ) as u8 | TAG_CONT ;
435
+ 2
436
+ } else if code < MAX_THREE_B && dst. len ( ) >= 3 {
437
+ * dst. get_unchecked_mut ( 0 ) = ( code >> 12 & 0x0F ) as u8 | TAG_THREE_B ;
438
+ * dst. get_unchecked_mut ( 1 ) = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
439
+ * dst. get_unchecked_mut ( 2 ) = ( code & 0x3F ) as u8 | TAG_CONT ;
440
+ 3
441
+ } else if dst. len ( ) >= 4 {
442
+ * dst. get_unchecked_mut ( 0 ) = ( code >> 18 & 0x07 ) as u8 | TAG_FOUR_B ;
443
+ * dst. get_unchecked_mut ( 1 ) = ( code >> 12 & 0x3F ) as u8 | TAG_CONT ;
444
+ * dst. get_unchecked_mut ( 2 ) = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
445
+ * dst. get_unchecked_mut ( 3 ) = ( code & 0x3F ) as u8 | TAG_CONT ;
446
+ 4
447
+ } else {
448
+ panic ! ( "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}" ,
449
+ from_u32_unchecked( code) . len_utf8( ) ,
450
+ code,
451
+ dst. len( ) )
452
+ } ;
453
+ from_utf8_unchecked_mut ( dst. get_unchecked_mut ( ..len) )
454
+ }
522
455
}
523
456
524
457
/// Encodes this character as UTF-16 into the provided `u16` buffer,
@@ -558,7 +491,25 @@ impl char {
558
491
#[ stable( feature = "unicode_encode_char" , since = "1.15.0" ) ]
559
492
#[ inline]
560
493
pub fn encode_utf16 ( self , dst : & mut [ u16 ] ) -> & mut [ u16 ] {
561
- C :: encode_utf16 ( self , dst)
494
+ let mut code = self as u32 ;
495
+ unsafe {
496
+ if ( code & 0xFFFF ) == code && !dst. is_empty ( ) {
497
+ // The BMP falls through (assuming non-surrogate, as it should)
498
+ * dst. get_unchecked_mut ( 0 ) = code as u16 ;
499
+ slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , 1 )
500
+ } else if dst. len ( ) >= 2 {
501
+ // Supplementary planes break into surrogates.
502
+ code -= 0x1_0000 ;
503
+ * dst. get_unchecked_mut ( 0 ) = 0xD800 | ( ( code >> 10 ) as u16 ) ;
504
+ * dst. get_unchecked_mut ( 1 ) = 0xDC00 | ( ( code as u16 ) & 0x3FF ) ;
505
+ slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , 2 )
506
+ } else {
507
+ panic ! ( "encode_utf16: need {} units to encode U+{:X}, but the buffer has {}" ,
508
+ from_u32_unchecked( code) . len_utf16( ) ,
509
+ code,
510
+ dst. len( ) )
511
+ }
512
+ }
562
513
}
563
514
564
515
/// Returns true if this `char` is an alphabetic code point, and false if not.
0 commit comments