Skip to content

Commit 7a5754b

Browse files
committed
Auto merge of #24428 - kwantam:deprecate_unicode_fns, r=alexcrichton
This patch 1. renames libunicode to librustc_unicode, 2. deprecates several pieces of libunicode (see below), and 3. removes references to deprecated functions from librustc_driver and libsyntax. This may change pretty-printed output from these modules in cases involving wide or combining characters used in filenames, identifiers, etc. The following functions are marked deprecated: 1. char.width() and str.width(): --> use unicode-width crate 2. str.graphemes() and str.grapheme_indices(): --> use unicode-segmentation crate 3. str.nfd_chars(), str.nfkd_chars(), str.nfc_chars(), str.nfkc_chars(), char.compose(), char.decompose_canonical(), char.decompose_compatible(), char.canonical_combining_class(): --> use unicode-normalization crate
2 parents 77213d1 + 29d1252 commit 7a5754b

File tree

27 files changed

+114
-57
lines changed

27 files changed

+114
-57
lines changed

mk/crates.mk

+6-6
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
TARGET_CRATES := libc std flate arena term \
5353
serialize getopts collections test rand \
5454
log graphviz core rbml alloc \
55-
unicode rustc_bitflags
55+
rustc_unicode rustc_bitflags
5656
RUSTC_CRATES := rustc rustc_typeck rustc_borrowck rustc_resolve rustc_driver \
5757
rustc_trans rustc_back rustc_llvm rustc_privacy rustc_lint \
5858
rustc_data_structures
@@ -62,9 +62,9 @@ TOOLS := compiletest rustdoc rustc rustbook
6262

6363
DEPS_core :=
6464
DEPS_libc := core
65-
DEPS_unicode := core
65+
DEPS_rustc_unicode := core
6666
DEPS_alloc := core libc native:jemalloc
67-
DEPS_std := core libc rand alloc collections unicode \
67+
DEPS_std := core libc rand alloc collections rustc_unicode \
6868
native:rust_builtin native:backtrace native:rustrt_native \
6969
rustc_bitflags
7070
DEPS_graphviz := std
@@ -96,7 +96,7 @@ DEPS_serialize := std log
9696
DEPS_rbml := std log serialize
9797
DEPS_term := std log
9898
DEPS_getopts := std
99-
DEPS_collections := core alloc unicode
99+
DEPS_collections := core alloc rustc_unicode
100100
DEPS_num := std
101101
DEPS_test := std getopts serialize rbml term native:rust_test_helpers
102102
DEPS_rand := core
@@ -117,11 +117,11 @@ ONLY_RLIB_libc := 1
117117
ONLY_RLIB_alloc := 1
118118
ONLY_RLIB_rand := 1
119119
ONLY_RLIB_collections := 1
120-
ONLY_RLIB_unicode := 1
120+
ONLY_RLIB_rustc_unicode := 1
121121
ONLY_RLIB_rustc_bitflags := 1
122122

123123
# Documented-by-default crates
124-
DOC_CRATES := std alloc collections core libc unicode
124+
DOC_CRATES := std alloc collections core libc rustc_unicode
125125

126126
################################################################################
127127
# You should not need to edit below this line

mk/tests.mk

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,14 @@
1515

1616
# The names of crates that must be tested
1717

18-
# libcore/libunicode tests are in a separate crate
18+
# libcore/librustc_unicode tests are in a separate crate
1919
DEPS_coretest :=
2020
$(eval $(call RUST_CRATE,coretest))
2121

2222
DEPS_collectionstest :=
2323
$(eval $(call RUST_CRATE,collectionstest))
2424

25-
TEST_TARGET_CRATES = $(filter-out core unicode,$(TARGET_CRATES)) \
25+
TEST_TARGET_CRATES = $(filter-out core rustc_unicode,$(TARGET_CRATES)) \
2626
collectionstest coretest
2727
TEST_DOC_CRATES = $(DOC_CRATES)
2828
TEST_HOST_CRATES = $(filter-out rustc_typeck rustc_borrowck rustc_resolve \

src/etc/unicode.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -518,11 +518,14 @@ def comp_pfun(char):
518518
emit_table(f, "combining_class_table", combine, "&'static [(char, char, u8)]", is_pub=False,
519519
pfun=lambda x: "(%s,%s,%s)" % (escape_char(x[0]), escape_char(x[1]), x[2]))
520520

521-
f.write(" pub fn canonical_combining_class(c: char) -> u8 {\n"
522-
+ " bsearch_range_value_table(c, combining_class_table)\n"
523-
+ " }\n")
521+
f.write(""" #[deprecated(reason = "use the crates.io `unicode-normalization` lib instead",
522+
since = "1.0.0")]
523+
#[unstable(feature = "unicode",
524+
reason = "this functionality will be moved to crates.io")]
525+
pub fn canonical_combining_class(c: char) -> u8 {
526+
bsearch_range_value_table(c, combining_class_table)
527+
}
524528
525-
f.write("""
526529
}
527530
528531
""")

src/libcollections/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
#[macro_use]
5151
extern crate core;
5252

53-
extern crate unicode;
53+
extern crate rustc_unicode;
5454
extern crate alloc;
5555

5656
#[cfg(test)] #[macro_use] extern crate std;

src/libcollections/str.rs

+35-9
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,13 @@ use core::result::Result;
5959
use core::str as core_str;
6060
use core::str::pattern::Pattern;
6161
use core::str::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher};
62-
use unicode::str::{UnicodeStr, Utf16Encoder};
62+
use rustc_unicode::str::{UnicodeStr, Utf16Encoder};
6363

6464
use core::convert::AsRef;
6565
use vec_deque::VecDeque;
6666
use borrow::{Borrow, ToOwned};
6767
use string::String;
68-
use unicode;
68+
use rustc_unicode;
6969
use vec::Vec;
7070
use slice::SliceConcatExt;
7171

@@ -78,7 +78,7 @@ pub use core::str::{Matches, RMatches};
7878
pub use core::str::{MatchIndices, RMatchIndices};
7979
pub use core::str::{from_utf8, Chars, CharIndices, Bytes};
8080
pub use core::str::{from_utf8_unchecked, ParseBoolError};
81-
pub use unicode::str::{Words, Graphemes, GraphemeIndices};
81+
pub use rustc_unicode::str::{Words, Graphemes, GraphemeIndices};
8282
pub use core::str::pattern;
8383

8484
/*
@@ -161,6 +161,9 @@ enum DecompositionType {
161161
/// External iterator for a string decomposition's characters.
162162
///
163163
/// For use with the `std::iter` module.
164+
#[allow(deprecated)]
165+
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
166+
since = "1.0.0")]
164167
#[derive(Clone)]
165168
#[unstable(feature = "unicode",
166169
reason = "this functionality may be replaced with a more generic \
@@ -172,6 +175,7 @@ pub struct Decompositions<'a> {
172175
sorted: bool
173176
}
174177

178+
#[allow(deprecated)]
175179
#[stable(feature = "rust1", since = "1.0.0")]
176180
impl<'a> Iterator for Decompositions<'a> {
177181
type Item = char;
@@ -198,7 +202,7 @@ impl<'a> Iterator for Decompositions<'a> {
198202
{
199203
let callback = |d| {
200204
let class =
201-
unicode::char::canonical_combining_class(d);
205+
rustc_unicode::char::canonical_combining_class(d);
202206
if class == 0 && !*sorted {
203207
canonical_sort(buffer);
204208
*sorted = true;
@@ -207,10 +211,10 @@ impl<'a> Iterator for Decompositions<'a> {
207211
};
208212
match self.kind {
209213
Canonical => {
210-
unicode::char::decompose_canonical(ch, callback)
214+
rustc_unicode::char::decompose_canonical(ch, callback)
211215
}
212216
Compatible => {
213-
unicode::char::decompose_compatible(ch, callback)
217+
rustc_unicode::char::decompose_compatible(ch, callback)
214218
}
215219
}
216220
}
@@ -254,6 +258,9 @@ enum RecompositionState {
254258
/// External iterator for a string recomposition's characters.
255259
///
256260
/// For use with the `std::iter` module.
261+
#[allow(deprecated)]
262+
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
263+
since = "1.0.0")]
257264
#[derive(Clone)]
258265
#[unstable(feature = "unicode",
259266
reason = "this functionality may be replaced with a more generic \
@@ -266,6 +273,7 @@ pub struct Recompositions<'a> {
266273
last_ccc: Option<u8>
267274
}
268275

276+
#[allow(deprecated)]
269277
#[stable(feature = "rust1", since = "1.0.0")]
270278
impl<'a> Iterator for Recompositions<'a> {
271279
type Item = char;
@@ -276,7 +284,7 @@ impl<'a> Iterator for Recompositions<'a> {
276284
match self.state {
277285
Composing => {
278286
for ch in self.iter.by_ref() {
279-
let ch_class = unicode::char::canonical_combining_class(ch);
287+
let ch_class = rustc_unicode::char::canonical_combining_class(ch);
280288
if self.composee.is_none() {
281289
if ch_class != 0 {
282290
return Some(ch);
@@ -288,7 +296,7 @@ impl<'a> Iterator for Recompositions<'a> {
288296

289297
match self.last_ccc {
290298
None => {
291-
match unicode::char::compose(k, ch) {
299+
match rustc_unicode::char::compose(k, ch) {
292300
Some(r) => {
293301
self.composee = Some(r);
294302
continue;
@@ -316,7 +324,7 @@ impl<'a> Iterator for Recompositions<'a> {
316324
self.last_ccc = Some(ch_class);
317325
continue;
318326
}
319-
match unicode::char::compose(k, ch) {
327+
match rustc_unicode::char::compose(k, ch) {
320328
Some(r) => {
321329
self.composee = Some(r);
322330
continue;
@@ -465,6 +473,9 @@ impl str {
465473

466474
/// Returns an iterator over the string in Unicode Normalization Form D
467475
/// (canonical decomposition).
476+
#[allow(deprecated)]
477+
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
478+
since = "1.0.0")]
468479
#[inline]
469480
#[unstable(feature = "unicode",
470481
reason = "this functionality may be replaced with a more generic \
@@ -480,6 +491,9 @@ impl str {
480491

481492
/// Returns an iterator over the string in Unicode Normalization Form KD
482493
/// (compatibility decomposition).
494+
#[allow(deprecated)]
495+
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
496+
since = "1.0.0")]
483497
#[inline]
484498
#[unstable(feature = "unicode",
485499
reason = "this functionality may be replaced with a more generic \
@@ -495,6 +509,9 @@ impl str {
495509

496510
/// An Iterator over the string in Unicode Normalization Form C
497511
/// (canonical decomposition followed by canonical composition).
512+
#[allow(deprecated)]
513+
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
514+
since = "1.0.0")]
498515
#[inline]
499516
#[unstable(feature = "unicode",
500517
reason = "this functionality may be replaced with a more generic \
@@ -511,6 +528,9 @@ impl str {
511528

512529
/// An Iterator over the string in Unicode Normalization Form KC
513530
/// (compatibility decomposition followed by canonical composition).
531+
#[allow(deprecated)]
532+
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
533+
since = "1.0.0")]
514534
#[inline]
515535
#[unstable(feature = "unicode",
516536
reason = "this functionality may be replaced with a more generic \
@@ -1690,6 +1710,8 @@ impl str {
16901710
///
16911711
/// assert_eq!(&gr2[..], b);
16921712
/// ```
1713+
#[deprecated(reason = "use the crates.io `unicode-segmentation` library instead",
1714+
since = "1.0.0")]
16931715
#[unstable(feature = "unicode",
16941716
reason = "this functionality may only be provided by libunicode")]
16951717
pub fn graphemes(&self, is_extended: bool) -> Graphemes {
@@ -1709,6 +1731,8 @@ impl str {
17091731
///
17101732
/// assert_eq!(&gr_inds[..], b);
17111733
/// ```
1734+
#[deprecated(reason = "use the crates.io `unicode-segmentation` library instead",
1735+
since = "1.0.0")]
17121736
#[unstable(feature = "unicode",
17131737
reason = "this functionality may only be provided by libunicode")]
17141738
pub fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
@@ -1749,6 +1773,8 @@ impl str {
17491773
/// recommends that these
17501774
/// characters be treated as 1 column (i.e., `is_cjk = false`) if the
17511775
/// locale is unknown.
1776+
#[deprecated(reason = "use the crates.io `unicode-width` library instead",
1777+
since = "1.0.0")]
17521778
#[unstable(feature = "unicode",
17531779
reason = "this functionality may only be provided by libunicode")]
17541780
pub fn width(&self, is_cjk: bool) -> usize {

src/libcollections/string.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ use core::ops::{self, Deref, Add, Index};
2525
use core::ptr;
2626
use core::slice;
2727
use core::str::pattern::Pattern;
28-
use unicode::str as unicode_str;
29-
use unicode::str::Utf16Item;
28+
use rustc_unicode::str as unicode_str;
29+
use rustc_unicode::str::Utf16Item;
3030

3131
use borrow::{Cow, IntoCow};
3232
use str::{self, FromStr, Utf8Error};

src/libcollectionstest/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727

2828
extern crate collections;
2929
extern crate test;
30-
extern crate unicode;
30+
extern crate rustc_unicode;
3131

3232
#[cfg(test)] #[macro_use] mod bench;
3333

src/libcollectionstest/str.rs

+8-2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ fn test_le() {
1919
assert!("foo" != "bar");
2020
}
2121

22+
#[allow(deprecated)]
2223
#[test]
2324
fn test_len() {
2425
assert_eq!("".len(), 0);
@@ -498,7 +499,7 @@ fn test_is_utf8() {
498499

499500
#[test]
500501
fn test_is_utf16() {
501-
use unicode::str::is_utf16;
502+
use rustc_unicode::str::is_utf16;
502503

503504
macro_rules! pos {
504505
($($e:expr),*) => { { $(assert!(is_utf16($e));)* } }
@@ -944,6 +945,7 @@ fn test_words() {
944945
assert_eq!(words, ["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
945946
}
946947

948+
#[allow(deprecated)]
947949
#[test]
948950
fn test_nfd_chars() {
949951
macro_rules! t {
@@ -963,6 +965,7 @@ fn test_nfd_chars() {
963965
t!("\u{ac1c}", "\u{1100}\u{1162}");
964966
}
965967

968+
#[allow(deprecated)]
966969
#[test]
967970
fn test_nfkd_chars() {
968971
macro_rules! t {
@@ -982,6 +985,7 @@ fn test_nfkd_chars() {
982985
t!("\u{ac1c}", "\u{1100}\u{1162}");
983986
}
984987

988+
#[allow(deprecated)]
985989
#[test]
986990
fn test_nfc_chars() {
987991
macro_rules! t {
@@ -1002,6 +1006,7 @@ fn test_nfc_chars() {
10021006
t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
10031007
}
10041008

1009+
#[allow(deprecated)]
10051010
#[test]
10061011
fn test_nfkc_chars() {
10071012
macro_rules! t {
@@ -1033,6 +1038,7 @@ fn test_lines() {
10331038
assert_eq!(lines, ["", "Märy häd ä little lämb", "", "Little lämb"]);
10341039
}
10351040

1041+
#[allow(deprecated)]
10361042
#[test]
10371043
fn test_graphemes() {
10381044
use std::iter::order;
@@ -1629,7 +1635,7 @@ fn test_rev_split_char_iterator_no_trailing() {
16291635

16301636
#[test]
16311637
fn test_utf16_code_units() {
1632-
use unicode::str::Utf16Encoder;
1638+
use rustc_unicode::str::Utf16Encoder;
16331639
assert_eq!(Utf16Encoder::new(vec!['é', '\u{1F4A9}'].into_iter()).collect::<Vec<u16>>(),
16341640
[0xE9, 0xD83D, 0xDCA9])
16351641
}

src/libcollectionstest/string.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ fn test_from_utf16() {
135135
let s_as_utf16 = s.utf16_units().collect::<Vec<u16>>();
136136
let u_as_string = String::from_utf16(&u).unwrap();
137137

138-
assert!(::unicode::str::is_utf16(&u));
138+
assert!(::rustc_unicode::str::is_utf16(&u));
139139
assert_eq!(s_as_utf16, u);
140140

141141
assert_eq!(u_as_string, s);

src/libcore/char.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
//! Character manipulation.
1212
//!
13-
//! For more details, see ::unicode::char (a.k.a. std::char)
13+
//! For more details, see ::rustc_unicode::char (a.k.a. std::char)
1414
1515
#![allow(non_snake_case)]
1616
#![doc(primitive = "char")]

src/libcoretest/char.rs

+1
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ fn test_len_utf16() {
210210
assert!('\u{1f4a9}'.len_utf16() == 2);
211211
}
212212

213+
#[allow(deprecated)]
213214
#[test]
214215
fn test_width() {
215216
assert_eq!('\x00'.width(false),Some(0));

src/libcoretest/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
extern crate core;
3333
extern crate test;
3434
extern crate libc;
35-
extern crate unicode;
35+
extern crate rustc_unicode;
3636

3737
mod any;
3838
mod atomic;

0 commit comments

Comments
 (0)