Skip to content

Commit a166400

Browse files
committed
Auto merge of #24560 - kwantam:apply_table_opt, r=alexcrichton
Apply optimization described in rust-lang/regex#73 (comment) to rust's copy of `unicode.py`. This shrinks librustc_unicode's tables.rs from 479kB to 456kB, and should improve performance slightly for related operations (e.g., is_alphabetic(), is_xid_start(), etc). In addition, pull in fix from @dscorbett's commit d25c39f86568a147f9b7080c25711fb1f98f056a in regex, which makes `load_properties()` more tolerant of whitespace in the Unicode tables. (This fix does not result in any changes to tables.rs, but could if the Unicode tables change in the future.)
2 parents 49a94f2 + f14d289 commit a166400

File tree

2 files changed

+656
-897
lines changed

2 files changed

+656
-897
lines changed

src/etc/unicode.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
import fileinput, re, os, sys, operator
2727

28-
preamble = '''// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
28+
preamble = '''// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2929
// file at the top-level directory of this distribution and at
3030
// http://rust-lang.org/COPYRIGHT.
3131
//
@@ -207,8 +207,8 @@ def format_table_content(f, content, indent):
207207
def load_properties(f, interestingprops):
208208
fetch(f)
209209
props = {}
210-
re1 = re.compile("^([0-9A-F]+) +; (\w+)")
211-
re2 = re.compile("^([0-9A-F]+)\.\.([0-9A-F]+) +; (\w+)")
210+
re1 = re.compile("^ *([0-9A-F]+) *; *(\w+)")
211+
re2 = re.compile("^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)")
212212

213213
for line in fileinput.input(os.path.basename(f)):
214214
prop = None
@@ -234,6 +234,11 @@ def load_properties(f, interestingprops):
234234
if prop not in props:
235235
props[prop] = []
236236
props[prop].append((d_lo, d_hi))
237+
238+
# optimize if possible
239+
for prop in props:
240+
props[prop] = group_cat(ungroup_cat(props[prop]))
241+
237242
return props
238243

239244
# load all widths of want_widths, except those in except_cats

0 commit comments

Comments
 (0)