@@ -116,9 +116,9 @@ impl char {
116
116
117
117
// the code is split up here to improve execution speed for cases where
118
118
// the `radix` is constant and 10 or smaller
119
- let val = if radix <= 10 {
119
+ let val = if radix <= 10 {
120
120
match self {
121
- '0' ..= '9' => self as u32 - '0' as u32 ,
121
+ '0' ..='9' => self as u32 - '0' as u32 ,
122
122
_ => return None ,
123
123
}
124
124
} else {
@@ -130,8 +130,11 @@ impl char {
130
130
}
131
131
} ;
132
132
133
- if val < radix { Some ( val) }
134
- else { None }
133
+ if val < radix {
134
+ Some ( val)
135
+ } else {
136
+ None
137
+ }
135
138
}
136
139
137
140
/// Returns an iterator that yields the hexadecimal Unicode escape of a
@@ -303,8 +306,8 @@ impl char {
303
306
'\r' => EscapeDefaultState :: Backslash ( 'r' ) ,
304
307
'\n' => EscapeDefaultState :: Backslash ( 'n' ) ,
305
308
'\\' | '\'' | '"' => EscapeDefaultState :: Backslash ( self ) ,
306
- '\x20' ..= '\x7e' => EscapeDefaultState :: Char ( self ) ,
307
- _ => EscapeDefaultState :: Unicode ( self . escape_unicode ( ) )
309
+ '\x20' ..='\x7e' => EscapeDefaultState :: Char ( self ) ,
310
+ _ => EscapeDefaultState :: Unicode ( self . escape_unicode ( ) ) ,
308
311
} ;
309
312
EscapeDefault { state : init_state }
310
313
}
@@ -436,30 +439,31 @@ impl char {
436
439
pub fn encode_utf8 ( self , dst : & mut [ u8 ] ) -> & mut str {
437
440
let code = self as u32 ;
438
441
unsafe {
439
- let len =
440
- if code < MAX_ONE_B && !dst. is_empty ( ) {
442
+ let len = if code < MAX_ONE_B && !dst. is_empty ( ) {
441
443
* dst. get_unchecked_mut ( 0 ) = code as u8 ;
442
444
1
443
445
} else if code < MAX_TWO_B && dst. len ( ) >= 2 {
444
446
* dst. get_unchecked_mut ( 0 ) = ( code >> 6 & 0x1F ) as u8 | TAG_TWO_B ;
445
447
* dst. get_unchecked_mut ( 1 ) = ( code & 0x3F ) as u8 | TAG_CONT ;
446
448
2
447
- } else if code < MAX_THREE_B && dst. len ( ) >= 3 {
449
+ } else if code < MAX_THREE_B && dst. len ( ) >= 3 {
448
450
* dst. get_unchecked_mut ( 0 ) = ( code >> 12 & 0x0F ) as u8 | TAG_THREE_B ;
449
- * dst. get_unchecked_mut ( 1 ) = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
451
+ * dst. get_unchecked_mut ( 1 ) = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
450
452
* dst. get_unchecked_mut ( 2 ) = ( code & 0x3F ) as u8 | TAG_CONT ;
451
453
3
452
454
} else if dst. len ( ) >= 4 {
453
455
* dst. get_unchecked_mut ( 0 ) = ( code >> 18 & 0x07 ) as u8 | TAG_FOUR_B ;
454
456
* dst. get_unchecked_mut ( 1 ) = ( code >> 12 & 0x3F ) as u8 | TAG_CONT ;
455
- * dst. get_unchecked_mut ( 2 ) = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
457
+ * dst. get_unchecked_mut ( 2 ) = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
456
458
* dst. get_unchecked_mut ( 3 ) = ( code & 0x3F ) as u8 | TAG_CONT ;
457
459
4
458
460
} else {
459
- panic ! ( "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}" ,
461
+ panic ! (
462
+ "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}" ,
460
463
from_u32_unchecked( code) . len_utf8( ) ,
461
464
code,
462
- dst. len( ) )
465
+ dst. len( ) ,
466
+ )
463
467
} ;
464
468
from_utf8_unchecked_mut ( dst. get_unchecked_mut ( ..len) )
465
469
}
@@ -515,15 +519,24 @@ impl char {
515
519
* dst. get_unchecked_mut ( 1 ) = 0xDC00 | ( ( code as u16 ) & 0x3FF ) ;
516
520
slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , 2 )
517
521
} else {
518
- panic ! ( "encode_utf16: need {} units to encode U+{:X}, but the buffer has {}" ,
522
+ panic ! (
523
+ "encode_utf16: need {} units to encode U+{:X}, but the buffer has {}" ,
519
524
from_u32_unchecked( code) . len_utf16( ) ,
520
525
code,
521
- dst. len( ) )
526
+ dst. len( ) ,
527
+ )
522
528
}
523
529
}
524
530
}
525
531
526
- /// Returns `true` if this `char` is an alphabetic code point, and false if not.
532
+ /// Returns `true` if this `char` has the `Alphabetic` property.
533
+ ///
534
+ /// `Alphabetic` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
535
+ /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
536
+ ///
537
+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
538
+ /// [ucd]: https://www.unicode.org/reports/tr44/
539
+ /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
527
540
///
528
541
/// # Examples
529
542
///
@@ -547,10 +560,14 @@ impl char {
547
560
}
548
561
}
549
562
550
- /// Returns `true` if this `char` is lowercase .
563
+ /// Returns `true` if this `char` has the `Lowercase` property .
551
564
///
552
- /// 'Lowercase' is defined according to the terms of the Unicode Derived Core
553
- /// Property `Lowercase`.
565
+ /// `Lowercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
566
+ /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
567
+ ///
568
+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
569
+ /// [ucd]: https://www.unicode.org/reports/tr44/
570
+ /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
554
571
///
555
572
/// # Examples
556
573
///
@@ -575,10 +592,14 @@ impl char {
575
592
}
576
593
}
577
594
578
- /// Returns `true` if this `char` is uppercase.
595
+ /// Returns `true` if this `char` has the `Uppercase` property.
596
+ ///
597
+ /// `Uppercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
598
+ /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
579
599
///
580
- /// 'Uppercase' is defined according to the terms of the Unicode Derived Core
581
- /// Property `Uppercase`.
600
+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
601
+ /// [ucd]: https://www.unicode.org/reports/tr44/
602
+ /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
582
603
///
583
604
/// # Examples
584
605
///
@@ -603,10 +624,12 @@ impl char {
603
624
}
604
625
}
605
626
606
- /// Returns `true` if this `char` is whitespace .
627
+ /// Returns `true` if this `char` has the `White_Space` property .
607
628
///
608
- /// 'Whitespace' is defined according to the terms of the Unicode Derived Core
609
- /// Property `White_Space`.
629
+ /// `White_Space` is specified in the [Unicode Character Database][ucd] [`PropList.txt`].
630
+ ///
631
+ /// [ucd]: https://www.unicode.org/reports/tr44/
632
+ /// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
610
633
///
611
634
/// # Examples
612
635
///
@@ -630,10 +653,10 @@ impl char {
630
653
}
631
654
}
632
655
633
- /// Returns `true` if this `char` is alphanumeric .
656
+ /// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`] .
634
657
///
635
- /// 'Alphanumeric'-ness is defined in terms of the Unicode General Categories
636
- /// `Nd`, `Nl`, `No` and the Derived Core Property `Alphabetic`.
658
+ /// [`is_alphabetic()`]: #method.is_alphabetic
659
+ /// [`is_numeric()`]: #method.is_numeric
637
660
///
638
661
/// # Examples
639
662
///
@@ -655,10 +678,15 @@ impl char {
655
678
self . is_alphabetic ( ) || self . is_numeric ( )
656
679
}
657
680
658
- /// Returns `true` if this `char` is a control code point.
681
+ /// Returns `true` if this `char` has the general category for control codes.
682
+ ///
683
+ /// Control codes (code points with the general category of `Cc`) are described in Chapter 4
684
+ /// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character
685
+ /// Database][ucd] [`UnicodeData.txt`].
659
686
///
660
- /// 'Control code point' is defined in terms of the Unicode General
661
- /// Category `Cc`.
687
+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
688
+ /// [ucd]: https://www.unicode.org/reports/tr44/
689
+ /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
662
690
///
663
691
/// # Examples
664
692
///
@@ -675,19 +703,29 @@ impl char {
675
703
general_category:: Cc ( self )
676
704
}
677
705
678
- /// Returns `true` if this `char` is an extended grapheme character .
706
+ /// Returns `true` if this `char` has the `Grapheme_Extend` property .
679
707
///
680
- /// 'Extended grapheme character' is defined in terms of the Unicode Shaping and Rendering
681
- /// Category `Grapheme_Extend`.
708
+ /// `Grapheme_Extend` is described in [Unicode Standard Annex #29 (Unicode Text
709
+ /// Segmentation)][uax29] and specified in the [Unicode Character Database][ucd]
710
+ /// [`DerivedCoreProperties.txt`].
711
+ ///
712
+ /// [uax29]: https://www.unicode.org/reports/tr29/
713
+ /// [ucd]: https://www.unicode.org/reports/tr44/
714
+ /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
682
715
#[ inline]
683
716
pub ( crate ) fn is_grapheme_extended ( self ) -> bool {
684
717
derived_property:: Grapheme_Extend ( self )
685
718
}
686
719
687
- /// Returns `true` if this `char` is numeric.
720
+ /// Returns `true` if this `char` has one of the general categories for numbers.
721
+ ///
722
+ /// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric
723
+ /// characters, and `No` for other numeric characters) are specified in the [Unicode Character
724
+ /// Database][ucd] [`UnicodeData.txt`].
688
725
///
689
- /// 'Numeric'-ness is defined in terms of the Unicode General Categories
690
- /// `Nd`, `Nl`, `No`.
726
+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
727
+ /// [ucd]: https://www.unicode.org/reports/tr44/
728
+ /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
691
729
///
692
730
/// # Examples
693
731
///
@@ -713,25 +751,29 @@ impl char {
713
751
}
714
752
}
715
753
716
- /// Returns an iterator that yields the lowercase equivalent of a `char`
717
- /// as one or more `char`s.
754
+ /// Returns an iterator that yields the lowercase mapping of this `char` as one or more
755
+ /// `char`s.
718
756
///
719
- /// If a character does not have a lowercase equivalent, the same character
720
- /// will be returned back by the iterator.
757
+ /// If this `char` does not have a lowercase mapping, the iterator yields the same `char`.
721
758
///
722
- /// This performs complex unconditional mappings with no tailoring: it maps
723
- /// one Unicode character to its lowercase equivalent according to the
724
- /// [Unicode database] and the additional complex mappings
725
- /// [`SpecialCasing.txt`]. Conditional mappings (based on context or
726
- /// language) are not considered here.
759
+ /// If this `char` has a one-to-one lowercase mapping given by the [Unicode Character
760
+ /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
727
761
///
728
- /// For a full reference, see [here][reference].
762
+ /// [ucd]: https://www.unicode.org/reports/tr44/
763
+ /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
729
764
///
730
- /// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
765
+ /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
766
+ /// the `char`(s) given by [`SpecialCasing.txt`].
731
767
///
732
- /// [`SpecialCasing.txt`]: ftp ://ftp .unicode.org/Public/UNIDATA /SpecialCasing.txt
768
+ /// [`SpecialCasing.txt`]: https ://www .unicode.org/Public/UCD/latest/ucd /SpecialCasing.txt
733
769
///
734
- /// [reference]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
770
+ /// This operation performs an unconditional mapping without tailoring. That is, the conversion
771
+ /// is independent of context and language.
772
+ ///
773
+ /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
774
+ /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
775
+ ///
776
+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
735
777
///
736
778
/// # Examples
737
779
///
@@ -774,25 +816,29 @@ impl char {
774
816
ToLowercase ( CaseMappingIter :: new ( conversions:: to_lower ( self ) ) )
775
817
}
776
818
777
- /// Returns an iterator that yields the uppercase equivalent of a `char`
778
- /// as one or more `char`s.
819
+ /// Returns an iterator that yields the uppercase mapping of this `char` as one or more
820
+ /// `char`s.
821
+ ///
822
+ /// If this `char` does not have a uppercase mapping, the iterator yields the same `char`.
823
+ ///
824
+ /// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character
825
+ /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
779
826
///
780
- /// If a character does not have an uppercase equivalent, the same character
781
- /// will be returned back by the iterator.
827
+ /// [ucd]: https://www.unicode.org/reports/tr44/
828
+ /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
782
829
///
783
- /// This performs complex unconditional mappings with no tailoring: it maps
784
- /// one Unicode character to its uppercase equivalent according to the
785
- /// [Unicode database] and the additional complex mappings
786
- /// [`SpecialCasing.txt`]. Conditional mappings (based on context or
787
- /// language) are not considered here.
830
+ /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
831
+ /// the `char`(s) given by [`SpecialCasing.txt`].
788
832
///
789
- /// For a full reference, see [here][reference].
833
+ /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
790
834
///
791
- /// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
835
+ /// This operation performs an unconditional mapping without tailoring. That is, the conversion
836
+ /// is independent of context and language.
792
837
///
793
- /// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
838
+ /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
839
+ /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
794
840
///
795
- /// [reference ]: http ://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
841
+ /// [Unicode Standard ]: https ://www.unicode.org/versions/latest/
796
842
///
797
843
/// # Examples
798
844
///
0 commit comments