Skip to content

Commit dba5625

Browse files
author
Piotr Zolnierek
committed
Remove code duplication
Remove whitespace Update documentation for to_uppercase, to_lowercase
1 parent 04170b0 commit dba5625

File tree

3 files changed

+51
-79
lines changed

3 files changed

+51
-79
lines changed

src/etc/unicode.py

Lines changed: 19 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -160,23 +160,22 @@ def ch_prefix(ix):
160160

161161
def emit_bsearch_range_table(f):
162162
f.write("""
163-
fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
164-
use cmp::{Equal, Less, Greater};
165-
use vec::ImmutableVector;
166-
use option::None;
167-
r.bsearch(|&(lo,hi)| {
168-
if lo <= c && c <= hi { Equal }
169-
else if hi < c { Less }
170-
else { Greater }
171-
}) != None
172-
}\n\n
163+
fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
164+
use cmp::{Equal, Less, Greater};
165+
use vec::ImmutableVector;
166+
use option::None;
167+
r.bsearch(|&(lo,hi)| {
168+
if lo <= c && c <= hi { Equal }
169+
else if hi < c { Less }
170+
else { Greater }
171+
}) != None
172+
}\n\n
173173
""");
174174

175175
def emit_property_module(f, mod, tbl):
176176
f.write("pub mod %s {\n" % mod)
177177
keys = tbl.keys()
178178
keys.sort()
179-
emit_bsearch_range_table(f);
180179

181180
for cat in keys:
182181
if cat not in ["Nd", "Nl", "No", "Cc",
@@ -192,7 +191,7 @@ def emit_property_module(f, mod, tbl):
192191
f.write("\n ];\n\n")
193192

194193
f.write(" pub fn %s(c: char) -> bool {\n" % cat)
195-
f.write(" bsearch_range_table(c, %s_table)\n" % cat)
194+
f.write(" super::bsearch_range_table(c, %s_table)\n" % cat)
196195
f.write(" }\n\n")
197196
f.write("}\n")
198197

@@ -203,7 +202,7 @@ def emit_conversions_module(f, lowerupper, upperlower):
203202
use cmp::{Equal, Less, Greater};
204203
use vec::ImmutableVector;
205204
use tuple::Tuple2;
206-
use option::{ Option, Some, None };
205+
use option::{Option, Some, None};
207206
208207
pub fn to_lower(c: char) -> char {
209208
match bsearch_case_table(c, LuLl_table) {
@@ -227,23 +226,15 @@ def emit_conversions_module(f, lowerupper, upperlower):
227226
})
228227
}
229228
""");
230-
emit_caseconversions(f, lowerupper, upperlower)
229+
emit_caseconversion_table(f, "LuLl", upperlower)
230+
emit_caseconversion_table(f, "LlLu", lowerupper)
231231
f.write("}\n")
232232

233-
def emit_caseconversions(f, lowerupper, upperlower):
234-
f.write(" static LuLl_table : &'static [(char, char)] = &[\n")
235-
sorted_by_lu = sorted(upperlower.iteritems(), key=operator.itemgetter(0))
236-
ix = 0
237-
for key, value in sorted_by_lu:
238-
f.write(ch_prefix(ix))
239-
f.write("(%s, %s)" % (escape_char(key), escape_char(value)))
240-
ix += 1
241-
f.write("\n ];\n\n")
242-
243-
f.write(" static LlLu_table : &'static [(char, char)] = &[\n")
244-
sorted_by_ll = sorted(lowerupper.iteritems(), key=operator.itemgetter(0))
233+
def emit_caseconversion_table(f, name, table):
234+
f.write(" static %s_table : &'static [(char, char)] = &[\n" % name)
235+
sorted_table = sorted(table.iteritems(), key=operator.itemgetter(0))
245236
ix = 0
246-
for key, value in sorted_by_ll:
237+
for key, value in sorted_table:
247238
f.write(ch_prefix(ix))
248239
f.write("(%s, %s)" % (escape_char(key), escape_char(value)))
249240
ix += 1
@@ -425,6 +416,7 @@ def emit_decomp_module(f, canon, compat, combine):
425416
426417
''')
427418

419+
emit_bsearch_range_table(rf);
428420
emit_property_module(rf, "general_category", gencats)
429421

430422
emit_decomp_module(rf, canon_decomp, compat_decomp, combines)

src/libstd/char.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -228,20 +228,26 @@ pub fn to_digit(c: char, radix: uint) -> Option<uint> {
228228
/// Convert a char to its uppercase equivalent
229229
///
230230
/// The case-folding performed is the common or simple mapping:
231-
/// it only maps a codepoint to its equivalent if it is also a single codepoint
231+
/// it maps one unicode codepoint (one char in Rust) to its uppercase equivalent according
232+
/// to the Unicode database at ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
233+
/// The additional SpecialCasing.txt is not considered here, as it expands to multiple
234+
/// codepoints in some cases.
235+
///
236+
/// A full reference can be found here
237+
/// http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf#G33992
232238
///
233239
/// # Return value
234240
///
235-
/// Returns the char itself if no conversion if possible
241+
/// Returns the char itself if no conversion was made
236242
#[inline]
237243
pub fn to_uppercase(c: char) -> char {
238244
conversions::to_upper(c)
239245
}
240246

241247
/// Convert a char to its lowercase equivalent
242248
///
243-
/// The case-folding performed is the common or simple mapping:
244-
/// it only maps a codepoint to its equivalent if it is also a single codepoint
249+
/// The case-folding performed is the common or simple mapping
250+
/// see `to_uppercase` for references and more information
245251
///
246252
/// # Return value
247253
///

src/libstd/unicode.rs

Lines changed: 22 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -13,26 +13,26 @@
1313
#[allow(missing_doc)];
1414
#[allow(non_uppercase_statics)];
1515

16-
pub mod general_category {
1716

18-
fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
19-
use cmp::{Equal, Less, Greater};
20-
use vec::ImmutableVector;
21-
use option::None;
22-
r.bsearch(|&(lo,hi)| {
23-
if lo <= c && c <= hi { Equal }
24-
else if hi < c { Less }
25-
else { Greater }
26-
}) != None
27-
}
17+
fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
18+
use cmp::{Equal, Less, Greater};
19+
use vec::ImmutableVector;
20+
use option::None;
21+
r.bsearch(|&(lo,hi)| {
22+
if lo <= c && c <= hi { Equal }
23+
else if hi < c { Less }
24+
else { Greater }
25+
}) != None
26+
}
2827

2928

29+
pub mod general_category {
3030
static Cc_table : &'static [(char,char)] = &[
3131
('\x00', '\x1f'), ('\x7f', '\x9f')
3232
];
3333

3434
pub fn Cc(c: char) -> bool {
35-
bsearch_range_table(c, Cc_table)
35+
super::bsearch_range_table(c, Cc_table)
3636
}
3737

3838
static Nd_table : &'static [(char,char)] = &[
@@ -60,7 +60,7 @@ pub mod general_category {
6060
];
6161

6262
pub fn Nd(c: char) -> bool {
63-
bsearch_range_table(c, Nd_table)
63+
super::bsearch_range_table(c, Nd_table)
6464
}
6565

6666
static Nl_table : &'static [(char,char)] = &[
@@ -73,7 +73,7 @@ pub mod general_category {
7373
];
7474

7575
pub fn Nl(c: char) -> bool {
76-
bsearch_range_table(c, Nl_table)
76+
super::bsearch_range_table(c, Nl_table)
7777
}
7878

7979
static No_table : &'static [(char,char)] = &[
@@ -101,7 +101,7 @@ pub mod general_category {
101101
];
102102

103103
pub fn No(c: char) -> bool {
104-
bsearch_range_table(c, No_table)
104+
super::bsearch_range_table(c, No_table)
105105
}
106106

107107
}
@@ -2323,19 +2323,6 @@ pub mod decompose {
23232323
}
23242324

23252325
pub mod derived_property {
2326-
2327-
fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
2328-
use cmp::{Equal, Less, Greater};
2329-
use vec::ImmutableVector;
2330-
use option::None;
2331-
r.bsearch(|&(lo,hi)| {
2332-
if lo <= c && c <= hi { Equal }
2333-
else if hi < c { Less }
2334-
else { Greater }
2335-
}) != None
2336-
}
2337-
2338-
23392326
static Alphabetic_table : &'static [(char,char)] = &[
23402327
('\x41', '\x5a'), ('\x61', '\x7a'),
23412328
('\xaa', '\xaa'), ('\xb5', '\xb5'),
@@ -2745,7 +2732,7 @@ pub mod derived_property {
27452732
];
27462733

27472734
pub fn Alphabetic(c: char) -> bool {
2748-
bsearch_range_table(c, Alphabetic_table)
2735+
super::bsearch_range_table(c, Alphabetic_table)
27492736
}
27502737

27512738
static Lowercase_table : &'static [(char,char)] = &[
@@ -3067,7 +3054,7 @@ pub mod derived_property {
30673054
];
30683055

30693056
pub fn Lowercase(c: char) -> bool {
3070-
bsearch_range_table(c, Lowercase_table)
3057+
super::bsearch_range_table(c, Lowercase_table)
30713058
}
30723059

30733060
static Uppercase_table : &'static [(char,char)] = &[
@@ -3379,7 +3366,7 @@ pub mod derived_property {
33793366
];
33803367

33813368
pub fn Uppercase(c: char) -> bool {
3382-
bsearch_range_table(c, Uppercase_table)
3369+
super::bsearch_range_table(c, Uppercase_table)
33833370
}
33843371

33853372
static XID_Continue_table : &'static [(char,char)] = &[
@@ -3863,7 +3850,7 @@ pub mod derived_property {
38633850
];
38643851

38653852
pub fn XID_Continue(c: char) -> bool {
3866-
bsearch_range_table(c, XID_Continue_table)
3853+
super::bsearch_range_table(c, XID_Continue_table)
38673854
}
38683855

38693856
static XID_Start_table : &'static [(char,char)] = &[
@@ -4147,24 +4134,11 @@ pub mod derived_property {
41474134
];
41484135

41494136
pub fn XID_Start(c: char) -> bool {
4150-
bsearch_range_table(c, XID_Start_table)
4137+
super::bsearch_range_table(c, XID_Start_table)
41514138
}
41524139

41534140
}
41544141
pub mod property {
4155-
4156-
fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
4157-
use cmp::{Equal, Less, Greater};
4158-
use vec::ImmutableVector;
4159-
use option::None;
4160-
r.bsearch(|&(lo,hi)| {
4161-
if lo <= c && c <= hi { Equal }
4162-
else if hi < c { Less }
4163-
else { Greater }
4164-
}) != None
4165-
}
4166-
4167-
41684142
static White_Space_table : &'static [(char,char)] = &[
41694143
('\x09', '\x0d'), ('\x20', '\x20'),
41704144
('\x85', '\x85'), ('\xa0', '\xa0'),
@@ -4175,7 +4149,7 @@ pub mod property {
41754149
];
41764150

41774151
pub fn White_Space(c: char) -> bool {
4178-
bsearch_range_table(c, White_Space_table)
4152+
super::bsearch_range_table(c, White_Space_table)
41794153
}
41804154

41814155
}
@@ -4184,7 +4158,7 @@ pub mod conversions {
41844158
use cmp::{Equal, Less, Greater};
41854159
use vec::ImmutableVector;
41864160
use tuple::Tuple2;
4187-
use option::{ Option, Some, None };
4161+
use option::{Option, Some, None};
41884162

41894163
pub fn to_lower(c: char) -> char {
41904164
match bsearch_case_table(c, LuLl_table) {

0 commit comments

Comments
 (0)