rust-lang · kwantam · Apr 14, 2015
diff --git a/src/etc/unicode.py b/src/etc/unicode.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 #
-# Copyright 2011-2013 The Rust Project Developers. See the COPYRIGHT
+# Copyright 2011-2015 The Rust Project Developers. See the COPYRIGHT
 # file at the top-level directory of this distribution and at
 # http://rust-lang.org/COPYRIGHT.
 #
@@ -13,7 +13,6 @@
 # This script uses the following Unicode tables:
 # - DerivedCoreProperties.txt
 # - DerivedNormalizationProps.txt
-# - EastAsianWidth.txt
 # - auxiliary/GraphemeBreakProperty.txt
 # - PropList.txt
 # - ReadMe.txt
@@ -236,43 +235,6 @@ def load_properties(f, interestingprops):
         props[prop].append((d_lo, d_hi))
     return props
 
-# load all widths of want_widths, except those in except_cats
-def load_east_asian_width(want_widths, except_cats):
-    f = "EastAsianWidth.txt"
-    fetch(f)
-    widths = {}
-    re1 = re.compile("^([0-9A-F]+);(\w+) +# (\w+)")
-    re2 = re.compile("^([0-9A-F]+)\.\.([0-9A-F]+);(\w+) +# (\w+)")
-
-    for line in fileinput.input(f):
-        width = None
-        d_lo = 0
-        d_hi = 0
-        cat = None
-        m = re1.match(line)
-        if m:
-            d_lo = m.group(1)
-            d_hi = m.group(1)
-            width = m.group(2)
-            cat = m.group(3)
-        else:
-            m = re2.match(line)
-            if m:
-                d_lo = m.group(1)
-                d_hi = m.group(2)
-                width = m.group(3)
-                cat = m.group(4)
-            else:
-                continue
-        if cat in except_cats or width not in want_widths:
-            continue
-        d_lo = int(d_lo, 16)
-        d_hi = int(d_hi, 16)
-        if width not in widths:
-            widths[width] = []
-        widths[width].append((d_lo, d_hi))
-    return widths
-
 def escape_char(c):
     return "'\\u{%x}'" % c
 
@@ -395,48 +357,6 @@ def emit_grapheme_module(f, grapheme_table, grapheme_cats):
         is_pub=False)
     f.write("}\n")
 
-def emit_charwidth_module(f, width_table):
-    f.write("pub mod charwidth {\n")
-    f.write("    use core::option::Option;\n")
-    f.write("    use core::option::Option::{Some, None};\n")
-    f.write("    use core::slice::SliceExt;\n")
-    f.write("    use core::result::Result::{Ok, Err};\n")
-    f.write("""
-    fn bsearch_range_value_table(c: char, is_cjk: bool, r: &'static [(char, char, u8, u8)]) -> u8 {
-        use core::cmp::Ordering::{Equal, Less, Greater};
-        match r.binary_search_by(|&(lo, hi, _, _)| {
-            if lo <= c && c <= hi { Equal }
-            else if hi < c { Less }
-            else { Greater }
-        }) {
-            Ok(idx) => {
-                let (_, _, r_ncjk, r_cjk) = r[idx];
-                if is_cjk { r_cjk } else { r_ncjk }
-            }
-            Err(_) => 1
-        }
-    }
-""")
-
-    f.write("""
-    pub fn width(c: char, is_cjk: bool) -> Option<usize> {
-        match c as usize {
-            _c @ 0 => Some(0),          // null is zero width
-            cu if cu < 0x20 => None,    // control sequences have no width
-            cu if cu < 0x7F => Some(1), // ASCII
-            cu if cu < 0xA0 => None,    // more control sequences
-            _ => Some(bsearch_range_value_table(c, is_cjk, charwidth_table) as usize)
-        }
-    }
-
-""")
-
-    f.write("    // character width table. Based on Markus Kuhn's free wcwidth() implementation,\n")
-    f.write("    //     http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c\n")
-    emit_table(f, "charwidth_table", width_table, "&'static [(char, char, u8, u8)]", is_pub=False,
-            pfun=lambda x: "(%s,%s,%s,%s)" % (escape_char(x[0]), escape_char(x[1]), x[2], x[3]))
-    f.write("}\n\n")
-
 def emit_norm_module(f, canon, compat, combine, norm_props):
     canon_keys = canon.keys()
     canon_keys.sort()
@@ -527,43 +447,6 @@ def comp_pfun(char):
 
 """)
 
-def remove_from_wtable(wtable, val):
-    wtable_out = []
-    while wtable:
-        if wtable[0][1] < val:
-            wtable_out.append(wtable.pop(0))
-        elif wtable[0][0] > val:
-            break
-        else:
-            (wt_lo, wt_hi, width, width_cjk) = wtable.pop(0)
-            if wt_lo == wt_hi == val:
-                continue
-            elif wt_lo == val:
-                wtable_out.append((wt_lo+1, wt_hi, width, width_cjk))
-            elif wt_hi == val:
-                wtable_out.append((wt_lo, wt_hi-1, width, width_cjk))
-            else:
-                wtable_out.append((wt_lo, val-1, width, width_cjk))
-                wtable_out.append((val+1, wt_hi, width, width_cjk))
-    if wtable:
-        wtable_out.extend(wtable)
-    return wtable_out
-
-
-
-def optimize_width_table(wtable):
-    wtable_out = []
-    w_this = wtable.pop(0)
-    while wtable:
-        if w_this[1] == wtable[0][0] - 1 and w_this[2:3] == wtable[0][2:3]:
-            w_tmp = wtable.pop(0)
-            w_this = (w_this[0], w_tmp[1], w_tmp[2], w_tmp[3])
-        else:
-            wtable_out.append(w_this)
-            w_this = wtable.pop(0)
-    wtable_out.append(w_this)
-    return wtable_out
-
 if __name__ == "__main__":
     r = "tables.rs"
     if os.path.exists(r):
@@ -605,29 +488,6 @@ def optimize_width_table(wtable):
         emit_norm_module(rf, canon_decomp, compat_decomp, combines, norm_props)
         emit_conversions_module(rf, lowerupper, upperlower)
 
-        ### character width module
-        width_table = []
-        for zwcat in ["Me", "Mn", "Cf"]:
-            width_table.extend(map(lambda (lo, hi): (lo, hi, 0, 0), gencats[zwcat]))
-        width_table.append((4448, 4607, 0, 0))
-
-        # get widths, except those that are explicitly marked zero-width above
-        ea_widths = load_east_asian_width(["W", "F", "A"], ["Me", "Mn", "Cf"])
-        # these are doublewidth
-        for dwcat in ["W", "F"]:
-            width_table.extend(map(lambda (lo, hi): (lo, hi, 2, 2), ea_widths[dwcat]))
-        width_table.extend(map(lambda (lo, hi): (lo, hi, 1, 2), ea_widths["A"]))
-
-        width_table.sort(key=lambda w: w[0])
-
-        # soft hyphen is not zero width in preformatted text; it's used to indicate
-        # a hyphen inserted to facilitate a linebreak.
-        width_table = remove_from_wtable(width_table, 173)
-
-        # optimize the width table by collapsing adjacent entities when possible
-        width_table = optimize_width_table(width_table)
-        emit_charwidth_module(rf, width_table)
-
         ### grapheme cluster module
         # from http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Break_Property_Values
         grapheme_cats = load_properties("auxiliary/GraphemeBreakProperty.txt", [])

diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs
@@ -1736,25 +1736,6 @@ impl str {
         UnicodeStr::words(&self[..])
     }
 
-    /// Returns a string's displayed width in columns.
-    ///
-    /// Control characters have zero width.
-    ///
-    /// `is_cjk` determines behavior for characters in the Ambiguous category:
-    /// if `is_cjk` is
-    /// `true`, these are 2 columns wide; otherwise, they are 1.
-    /// In CJK locales, `is_cjk` should be
-    /// `true`, else it should be `false`.
-    /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
-    /// recommends that these
-    /// characters be treated as 1 column (i.e., `is_cjk = false`) if the
-    /// locale is unknown.
-    #[unstable(feature = "unicode",
-               reason = "this functionality may only be provided by libunicode")]
-    pub fn width(&self, is_cjk: bool) -> usize {
-        UnicodeStr::width(&self[..], is_cjk)
-    }
-
     /// Returns a `&str` with leading and trailing whitespace removed.
     ///
     /// # Examples

diff --git a/src/libcollectionstest/str.rs b/src/libcollectionstest/str.rs
@@ -37,15 +37,6 @@ fn test_len() {
     assert_eq!("\u{2620}".chars().count(), 1);
     assert_eq!("\u{1d11e}".chars().count(), 1);
     assert_eq!("ประเทศไทย中华Việt Nam".chars().count(), 19);
-
-    assert_eq!("ｈｅｌｌｏ".width(false), 10);
-    assert_eq!("ｈｅｌｌｏ".width(true), 10);
-    assert_eq!("\0\0\0\0\0".width(false), 0);
-    assert_eq!("\0\0\0\0\0".width(true), 0);
-    assert_eq!("".width(false), 0);
-    assert_eq!("".width(true), 0);
-    assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(false), 4);
-    assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(true), 8);
 }
 
 #[test]

diff --git a/src/libcoretest/char.rs b/src/libcoretest/char.rs
@@ -209,30 +209,3 @@ fn test_len_utf16() {
     assert!('\u{a66e}'.len_utf16() == 1);
     assert!('\u{1f4a9}'.len_utf16() == 2);
 }
-
-#[test]
-fn test_width() {
-    assert_eq!('\x00'.width(false),Some(0));
-    assert_eq!('\x00'.width(true),Some(0));
-
-    assert_eq!('\x0A'.width(false),None);
-    assert_eq!('\x0A'.width(true),None);
-
-    assert_eq!('w'.width(false),Some(1));
-    assert_eq!('w'.width(true),Some(1));
-
-    assert_eq!('ｈ'.width(false),Some(2));
-    assert_eq!('ｈ'.width(true),Some(2));
-
-    assert_eq!('\u{AD}'.width(false),Some(1));
-    assert_eq!('\u{AD}'.width(true),Some(1));
-
-    assert_eq!('\u{1160}'.width(false),Some(0));
-    assert_eq!('\u{1160}'.width(true),Some(0));
-
-    assert_eq!('\u{a1}'.width(false),Some(1));
-    assert_eq!('\u{a1}'.width(true),Some(2));
-
-    assert_eq!('\u{300}'.width(false),Some(0));
-    assert_eq!('\u{300}'.width(true),Some(0));
-}
diff --git a/src/librustc_driver/lib.rs b/src/librustc_driver/lib.rs
@@ -35,7 +35,6 @@
 #![feature(staged_api)]
 #![feature(exit_status)]
 #![feature(set_stdio)]
-#![feature(unicode)]
 
 extern crate arena;
 extern crate flate;
@@ -574,7 +573,7 @@ Available lint options:
     let builtin_groups = sort_lint_groups(builtin_groups);
 
     let max_name_len = plugin.iter().chain(builtin.iter())
-        .map(|&s| s.name.width(true))
+        .map(|&s| s.name.chars().count())
         .max().unwrap_or(0);
     let padded = |x: &str| {
         let mut s = repeat(" ").take(max_name_len - x.chars().count())
@@ -601,7 +600,7 @@ Available lint options:
 
 
     let max_name_len = plugin_groups.iter().chain(builtin_groups.iter())
-        .map(|&(s, _)| s.width(true))
+        .map(|&(s, _)| s.chars().count())
         .max().unwrap_or(0);
     let padded = |x: &str| {
         let mut s = repeat(" ").take(max_name_len - x.chars().count())

diff --git a/src/libsyntax/diagnostic.rs b/src/libsyntax/diagnostic.rs
@@ -595,7 +595,7 @@ fn highlight_lines(err: &mut EmitterWriter,
         let mut s = String::new();
         // Skip is the number of characters we need to skip because they are
         // part of the 'filename:line ' part of the previous line.
-        let skip = fm.name.width(false) + digits + 3;
+        let skip = fm.name.chars().count() + digits + 3;
         for _ in 0..skip {
             s.push(' ');
         }
@@ -615,7 +615,7 @@ fn highlight_lines(err: &mut EmitterWriter,
                         col += 8 - col%8;
                         s.push('\t');
                     },
-                    c => for _ in 0..c.width(false).unwrap_or(0) {
+                    _ => {
                         col += 1;
                         s.push(' ');
                     },
@@ -627,7 +627,7 @@ fn highlight_lines(err: &mut EmitterWriter,
             let count = match lastc {
                 // Most terminals have a tab stop every eight columns by default
                 '\t' => 8 - col%8,
-                _ => lastc.width(false).unwrap_or(0),
+                _ => 1,
             };
             col += count;
             s.extend(::std::iter::repeat('~').take(count));
@@ -638,7 +638,7 @@ fn highlight_lines(err: &mut EmitterWriter,
                     if pos >= hi.col.to_usize() { break; }
                     let count = match ch {
                         '\t' => 8 - col%8,
-                        _ => ch.width(false).unwrap_or(0),
+                        _ => 1,
                     };
                     col += count;
                     s.extend(::std::iter::repeat('~').take(count));
@@ -694,7 +694,7 @@ fn end_highlight_lines(w: &mut EmitterWriter,
     }
     let last_line_start = format!("{}:{} ", fm.name, lines[lines.len()-1].line_index + 1);
     let hi = cm.lookup_char_pos(sp.hi);
-    let skip = last_line_start.width(false);
+    let skip = last_line_start.chars().count();
     let mut s = String::new();
     for _ in 0..skip {
         s.push(' ');
@@ -710,9 +710,7 @@ fn end_highlight_lines(w: &mut EmitterWriter,
             // position.
             match ch {
                 '\t' => s.push('\t'),
-                c => for _ in 0..c.width(false).unwrap_or(0) {
-                    s.push(' ');
-                },
+                _ => s.push(' ')
             }
         }
     }

diff --git a/src/libunicode/char.rs b/src/libunicode/char.rs
@@ -31,7 +31,7 @@
 use core::char::CharExt as C;
 use core::option::Option::{self, Some};
 use core::iter::Iterator;
-use tables::{derived_property, property, general_category, conversions, charwidth};
+use tables::{derived_property, property, general_category, conversions};
 
 // stable reexports
 pub use core::char::{MAX, from_u32, from_digit, EscapeUnicode, EscapeDefault};
@@ -435,17 +435,4 @@ impl char {
     pub fn to_uppercase(self) -> ToUppercase {
         ToUppercase(Some(conversions::to_upper(self)))
     }
-
-    /// Returns this character's displayed width in columns, or `None` if it is a
-    /// control character other than `'\x00'`.
-    ///
-    /// `is_cjk` determines behavior for characters in the Ambiguous category:
-    /// if `is_cjk` is `true`, these are 2 columns wide; otherwise, they are 1.
-    /// In CJK contexts, `is_cjk` should be `true`, else it should be `false`.
-    /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
-    /// recommends that these characters be treated as 1 column (i.e.,
-    /// `is_cjk` = `false`) if the context cannot be reliably determined.
-    #[unstable(feature = "unicode",
-               reason = "needs expert opinion. is_cjk flag stands out as ugly")]
-    pub fn width(self, is_cjk: bool) -> Option<usize> { charwidth::width(self, is_cjk) }
 }