23
23
# Since this should not require frequent updates, we just store this
24
24
# out-of-line and check the unicode.rs file into git.
25
25
26
- import fileinput , re , os , sys , operator , math
26
+ import re , os , sys , operator , math
27
27
28
28
preamble = '''// Copyright 2012-2016 The Rust Project Developers. See the COPYRIGHT
29
29
// file at the top-level directory of this distribution and at
@@ -69,11 +69,13 @@ def fetch(f):
69
69
sys .stderr .write ("cannot load %s" % f )
70
70
exit (1 )
71
71
72
+ return open (f )
73
+
72
74
def is_surrogate (n ):
73
75
return surrogate_codepoints [0 ] <= n <= surrogate_codepoints [1 ]
74
76
75
- def load_unicode_data (f ):
76
- fetch (f )
77
+ def load_unicode_data ():
78
+ uni_data = fetch ("UnicodeData.txt" )
77
79
gencats = {}
78
80
to_lower = {}
79
81
to_upper = {}
@@ -84,7 +86,7 @@ def load_unicode_data(f):
84
86
85
87
udict = {}
86
88
range_start = - 1
87
- for line in fileinput . input ( f ) :
89
+ for line in uni_data :
88
90
data = line .split (';' )
89
91
if len (data ) != 15 :
90
92
continue
@@ -154,9 +156,9 @@ def load_unicode_data(f):
154
156
155
157
return (canon_decomp , compat_decomp , gencats , combines , to_upper , to_lower , to_title )
156
158
157
- def load_special_casing (f , to_upper , to_lower , to_title ):
158
- fetch (f )
159
- for line in fileinput . input ( f ) :
159
+ def load_special_casing (to_upper , to_lower , to_title ):
160
+ casing_data = fetch ("SpecialCasing.txt" )
161
+ for line in casing_data :
160
162
data = line .split ('#' )[0 ].split (';' )
161
163
if len (data ) == 5 :
162
164
code , lower , title , upper , _comment = data
@@ -237,13 +239,13 @@ def format_table_content(f, content, indent):
237
239
line = " " * indent + chunk
238
240
f .write (line )
239
241
240
- def load_properties (f , interestingprops ):
241
- fetch (f )
242
+ def load_properties (fname , interestingprops ):
243
+ f = fetch (fname )
242
244
props = {}
243
245
re1 = re .compile (r"^ *([0-9A-F]+) *; *(\w+)" )
244
246
re2 = re .compile (r"^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)" )
245
247
246
- for line in fileinput . input ( os . path . basename ( f )) :
248
+ for line in f :
247
249
prop = None
248
250
d_lo = 0
249
251
d_hi = 0
@@ -277,19 +279,20 @@ def load_properties(f, interestingprops):
277
279
def escape_char (c ):
278
280
return "'\\ u{%x}'" % c if c != 0 else "'\\ 0'"
279
281
280
- def emit_table (f , name , t_data , t_type = "&[(char, char)]" , is_pub = True ,
281
- pfun = lambda x : "(%s,%s)" % (escape_char (x [0 ]), escape_char (x [1 ]))):
282
- pub_string = ""
283
- if is_pub :
284
- pub_string = "pub "
285
- f .write (" %sconst %s: %s = &[\n " % (pub_string , name , t_type ))
282
+ def emit_table (f , name , t_data ):
283
+ f .write (" const %s: &[(char, [char; 3])] = &[\n " % (name ,))
286
284
data = ""
287
285
first = True
288
286
for dat in t_data :
289
287
if not first :
290
288
data += ","
291
289
first = False
292
- data += pfun (dat )
290
+ data += "(%s,[%s,%s,%s])" % (
291
+ escape_char (dat [0 ]),
292
+ escape_char (dat [1 ][0 ]),
293
+ escape_char (dat [1 ][1 ]),
294
+ escape_char (dat [1 ][2 ])
295
+ )
293
296
format_table_content (f , data , 8 )
294
297
f .write ("\n ];\n \n " )
295
298
@@ -306,7 +309,7 @@ def compute_trie(rawdata, chunksize):
306
309
root .append (childmap [child ])
307
310
return (root , child_data )
308
311
309
- def emit_bool_trie (f , name , t_data , is_pub = True ):
312
+ def emit_bool_trie (f , name , t_data ):
310
313
CHUNK = 64
311
314
rawdata = [False ] * 0x110000
312
315
for (lo , hi ) in t_data :
@@ -322,10 +325,7 @@ def emit_bool_trie(f, name, t_data, is_pub=True):
322
325
chunk |= 1 << j
323
326
chunks .append (chunk )
324
327
325
- pub_string = ""
326
- if is_pub :
327
- pub_string = "pub "
328
- f .write (" %sconst %s: &super::BoolTrie = &super::BoolTrie {\n " % (pub_string , name ))
328
+ f .write (" pub const %s: &super::BoolTrie = &super::BoolTrie {\n " % (name ,))
329
329
f .write (" r1: [\n " )
330
330
data = ',' .join ('0x%016x' % chunk for chunk in chunks [0 :0x800 // CHUNK ])
331
331
format_table_content (f , data , 12 )
@@ -360,7 +360,7 @@ def emit_bool_trie(f, name, t_data, is_pub=True):
360
360
361
361
f .write (" };\n \n " )
362
362
363
- def emit_small_bool_trie (f , name , t_data , is_pub = True ):
363
+ def emit_small_bool_trie (f , name , t_data ):
364
364
last_chunk = max (hi // 64 for (lo , hi ) in t_data )
365
365
n_chunks = last_chunk + 1
366
366
chunks = [0 ] * n_chunks
@@ -370,11 +370,8 @@ def emit_small_bool_trie(f, name, t_data, is_pub=True):
370
370
print (cp , cp // 64 , len (chunks ), lo , hi )
371
371
chunks [cp // 64 ] |= 1 << (cp & 63 )
372
372
373
- pub_string = ""
374
- if is_pub :
375
- pub_string = "pub "
376
- f .write (" %sconst %s: &super::SmallBoolTrie = &super::SmallBoolTrie {\n "
377
- % (pub_string , name ))
373
+ f .write (" pub const %s: &super::SmallBoolTrie = &super::SmallBoolTrie {\n "
374
+ % (name ,))
378
375
379
376
(r1 , r2 ) = compute_trie (chunks , 1 )
380
377
@@ -427,15 +424,10 @@ def emit_conversions_module(f, to_upper, to_lower, to_title):
427
424
}
428
425
429
426
""" )
430
- t_type = "&[(char, [char; 3])]"
431
- pfun = lambda x : "(%s,[%s,%s,%s])" % (
432
- escape_char (x [0 ]), escape_char (x [1 ][0 ]), escape_char (x [1 ][1 ]), escape_char (x [1 ][2 ]))
433
427
emit_table (f , "to_lowercase_table" ,
434
- sorted (to_lower .items (), key = operator .itemgetter (0 )),
435
- is_pub = False , t_type = t_type , pfun = pfun )
428
+ sorted (to_lower .items (), key = operator .itemgetter (0 )))
436
429
emit_table (f , "to_uppercase_table" ,
437
- sorted (to_upper .items (), key = operator .itemgetter (0 )),
438
- is_pub = False , t_type = t_type , pfun = pfun )
430
+ sorted (to_upper .items (), key = operator .itemgetter (0 )))
439
431
f .write ("}\n \n " )
440
432
441
433
def emit_norm_module (f , canon , compat , combine , norm_props ):
@@ -464,8 +456,7 @@ def emit_norm_module(f, canon, compat, combine, norm_props):
464
456
rf .write (preamble )
465
457
466
458
# download and parse all the data
467
- fetch ("ReadMe.txt" )
468
- with open ("ReadMe.txt" ) as readme :
459
+ with fetch ("ReadMe.txt" ) as readme :
469
460
pattern = r"for Version (\d+)\.(\d+)\.(\d+) of the Unicode"
470
461
unicode_version = re .search (pattern , readme .read ()).groups ()
471
462
rf .write ("""
@@ -480,8 +471,8 @@ def emit_norm_module(f, canon, compat, combine, norm_props):
480
471
};
481
472
""" % unicode_version )
482
473
(canon_decomp , compat_decomp , gencats , combines ,
483
- to_upper , to_lower , to_title ) = load_unicode_data ("UnicodeData.txt" )
484
- load_special_casing ("SpecialCasing.txt" , to_upper , to_lower , to_title )
474
+ to_upper , to_lower , to_title ) = load_unicode_data ()
475
+ load_special_casing (to_upper , to_lower , to_title )
485
476
want_derived = ["XID_Start" , "XID_Continue" , "Alphabetic" , "Lowercase" , "Uppercase" ,
486
477
"Cased" , "Case_Ignorable" ]
487
478
derived = load_properties ("DerivedCoreProperties.txt" , want_derived )
0 commit comments