19
19
# programs". It is not meant to be a complete implementation of unicode.
20
20
# For that we recommend you use a proper binding to libicu.
21
21
22
- import fileinput , re , os , sys
22
+ import fileinput , re , os , sys , operator
23
23
24
24
25
25
def fetch (f ):
@@ -35,6 +35,8 @@ def fetch(f):
35
35
def load_unicode_data (f ):
36
36
fetch (f )
37
37
gencats = {}
38
+ upperlower = {}
39
+ lowerupper = {}
38
40
combines = []
39
41
canon_decomp = {}
40
42
compat_decomp = {}
@@ -44,6 +46,7 @@ def load_unicode_data(f):
44
46
c_hi = 0
45
47
com_lo = 0
46
48
com_hi = 0
49
+
47
50
for line in fileinput .input (f ):
48
51
fields = line .split (";" )
49
52
if len (fields ) != 15 :
@@ -52,7 +55,17 @@ def load_unicode_data(f):
52
55
decomp , deci , digit , num , mirror ,
53
56
old , iso , upcase , lowcase , titlecase ] = fields
54
57
55
- code = int (code , 16 )
58
+ code_org = code
59
+ code = int (code , 16 )
60
+
61
+ # generate char to char direct common and simple conversions
62
+ # uppercase to lowercase
63
+ if gencat == "Lu" and lowcase != "" and code_org != lowcase :
64
+ upperlower [code ] = int (lowcase , 16 )
65
+
66
+ # lowercase to uppercase
67
+ if gencat == "Ll" and upcase != "" and code_org != upcase :
68
+ lowerupper [code ] = int (upcase , 16 )
56
69
57
70
if decomp != "" :
58
71
if decomp .startswith ('<' ):
@@ -96,7 +109,7 @@ def load_unicode_data(f):
96
109
com_lo = code
97
110
com_hi = code
98
111
99
- return (canon_decomp , compat_decomp , gencats , combines )
112
+ return (canon_decomp , compat_decomp , gencats , combines , lowerupper , upperlower )
100
113
101
114
def load_properties (f , interestingprops ):
102
115
fetch (f )
@@ -164,11 +177,12 @@ def emit_property_module(f, mod, tbl):
164
177
keys = tbl .keys ()
165
178
keys .sort ()
166
179
emit_bsearch_range_table (f );
180
+
167
181
for cat in keys :
168
182
if cat not in ["Nd" , "Nl" , "No" , "Cc" ,
169
- "XID_Start" , "XID_Continue" , "Alphabetic" ,
170
- "Lowercase" , "Uppercase" , "White_Space" ]:
171
- continue
183
+ "XID_Start" , "XID_Continue" , "Alphabetic" ,
184
+ "Lowercase" , "Uppercase" , "White_Space" ]:
185
+ continue
172
186
f .write (" static %s_table : &'static [(char,char)] = &[\n " % cat )
173
187
ix = 0
174
188
for pair in tbl [cat ]:
@@ -183,30 +197,58 @@ def emit_property_module(f, mod, tbl):
183
197
f .write ("}\n " )
184
198
185
199
186
- def emit_property_module_old (f , mod , tbl ):
187
- f .write ("mod %s {\n " % mod )
188
- keys = tbl .keys ()
189
- keys .sort ()
190
- for cat in keys :
191
- f .write (" fn %s(c: char) -> bool {\n " % cat )
192
- f .write (" ret alt c {\n " )
193
- prefix = ' '
194
- for pair in tbl [cat ]:
195
- if pair [0 ] == pair [1 ]:
196
- f .write (" %c %s\n " %
197
- (prefix , escape_char (pair [0 ])))
198
- else :
199
- f .write (" %c %s to %s\n " %
200
- (prefix ,
201
- escape_char (pair [0 ]),
202
- escape_char (pair [1 ])))
203
- prefix = '|'
204
- f .write (" { true }\n " )
205
- f .write (" _ { false }\n " )
206
- f .write (" };\n " )
207
- f .write (" }\n \n " )
200
+ def emit_conversions_module (f , lowerupper , upperlower ):
201
+ f .write ("pub mod conversions {\n " )
202
+ f .write ("""
203
+ use cmp::{Equal, Less, Greater};
204
+ use vec::ImmutableVector;
205
+ use tuple::Tuple2;
206
+ use option::{ Option, Some, None };
207
+
208
+ pub fn to_lower(c: char) -> char {
209
+ match bsearch_case_table(c, LuLl_table) {
210
+ None => c,
211
+ Some(index) => LuLl_table[index].val1()
212
+ }
213
+ }
214
+
215
+ pub fn to_upper(c: char) -> char {
216
+ match bsearch_case_table(c, LlLu_table) {
217
+ None => c,
218
+ Some(index) => LlLu_table[index].val1()
219
+ }
220
+ }
221
+
222
+ fn bsearch_case_table(c: char, table: &'static [(char, char)]) -> Option<uint> {
223
+ table.bsearch(|&(key, _)| {
224
+ if c == key { Equal }
225
+ else if key < c { Less }
226
+ else { Greater }
227
+ })
228
+ }
229
+ """ );
230
+ emit_caseconversions (f , lowerupper , upperlower )
208
231
f .write ("}\n " )
209
232
233
+ def emit_caseconversions (f , lowerupper , upperlower ):
234
+ f .write (" static LuLl_table : &'static [(char, char)] = &[\n " )
235
+ sorted_by_lu = sorted (upperlower .iteritems (), key = operator .itemgetter (0 ))
236
+ ix = 0
237
+ for key , value in sorted_by_lu :
238
+ f .write (ch_prefix (ix ))
239
+ f .write ("(%s, %s)" % (escape_char (key ), escape_char (value )))
240
+ ix += 1
241
+ f .write ("\n ];\n \n " )
242
+
243
+ f .write (" static LlLu_table : &'static [(char, char)] = &[\n " )
244
+ sorted_by_ll = sorted (lowerupper .iteritems (), key = operator .itemgetter (0 ))
245
+ ix = 0
246
+ for key , value in sorted_by_ll :
247
+ f .write (ch_prefix (ix ))
248
+ f .write ("(%s, %s)" % (escape_char (key ), escape_char (value )))
249
+ ix += 1
250
+ f .write ("\n ];\n \n " )
251
+
210
252
def format_table_content (f , content , indent ):
211
253
line = " " * indent
212
254
first = True
@@ -362,7 +404,8 @@ def emit_decomp_module(f, canon, compat, combine):
362
404
os .remove (i );
363
405
rf = open (r , "w" )
364
406
365
- (canon_decomp , compat_decomp , gencats , combines ) = load_unicode_data ("UnicodeData.txt" )
407
+ (canon_decomp , compat_decomp , gencats ,
408
+ combines , lowerupper , upperlower ) = load_unicode_data ("UnicodeData.txt" )
366
409
367
410
# Preamble
368
411
rf .write ('''// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
@@ -388,7 +431,9 @@ def emit_decomp_module(f, canon, compat, combine):
388
431
389
432
derived = load_properties ("DerivedCoreProperties.txt" ,
390
433
["XID_Start" , "XID_Continue" , "Alphabetic" , "Lowercase" , "Uppercase" ])
434
+
391
435
emit_property_module (rf , "derived_property" , derived )
392
436
393
437
props = load_properties ("PropList.txt" , ["White_Space" ])
394
438
emit_property_module (rf , "property" , props )
439
+ emit_conversions_module (rf , lowerupper , upperlower )
0 commit comments