1
Fork 0

Auto merge of #24560 - kwantam:apply_table_opt, r=alexcrichton

Apply optimization described in
https://github.com/rust-lang/regex/pull/73#issuecomment-93777126
to rust's copy of `unicode.py`.

This shrinks librustc_unicode's tables.rs from 479kB to 456kB,
and should improve performance slightly for related operations
(e.g., is_alphabetic(), is_xid_start(), etc).

In addition, pull in fix from @dscorbett's commit
d25c39f86568a147f9b7080c25711fb1f98f056a in regex, which
makes `load_properties()` more tolerant of whitespace
in the Unicode tables. (This fix does not result in any
changes to tables.rs, but could if the Unicode tables
change in the future.)
This commit is contained in:
bors 2015-04-18 21:30:03 +00:00
commit a16640051d
2 changed files with 656 additions and 897 deletions

View file

@ -25,7 +25,7 @@
import fileinput, re, os, sys, operator
preamble = '''// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
preamble = '''// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
@ -207,8 +207,8 @@ def format_table_content(f, content, indent):
def load_properties(f, interestingprops):
fetch(f)
props = {}
re1 = re.compile("^([0-9A-F]+) +; (\w+)")
re2 = re.compile("^([0-9A-F]+)\.\.([0-9A-F]+) +; (\w+)")
re1 = re.compile("^ *([0-9A-F]+) *; *(\w+)")
re2 = re.compile("^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)")
for line in fileinput.input(os.path.basename(f)):
prop = None
@ -234,6 +234,11 @@ def load_properties(f, interestingprops):
if prop not in props:
props[prop] = []
props[prop].append((d_lo, d_hi))
# optimize if possible
for prop in props:
props[prop] = group_cat(ungroup_cat(props[prop]))
return props
# load all widths of want_widths, except those in except_cats

File diff suppressed because it is too large Load diff