@@ -47,37 +47,39 @@ def fetch(f):
4747 sys .stderr .write ("cannot load %s\n " % f )
4848 exit (1 )
4949
50- # load identifier status data
51- def load_identifier_status ():
52- f = "IdentifierStatus.txt"
50+ # Implementation from unicode-segmentation
51+ def load_properties (f , interestingprops = None ):
5352 fetch (f )
54- statuses = []
55- re1 = re .compile ("^ ([0-9A-F]+) +; + (\w+)" )
56- re2 = re .compile ("^ ([0-9A-F]+)\.\.([0-9A-F]+) +; + (\w+)" )
53+ props = {}
54+ re1 = re .compile (r"^ * ([0-9A-F]+) *; * (\w+)" )
55+ re2 = re .compile (r"^ * ([0-9A-F]+)\.\.([0-9A-F]+) *; * (\w+)" )
5756
58- for line in fileinput .input (f ):
57+ for line in fileinput .input (os .path .basename (f )):
58+ prop = None
5959 d_lo = 0
6060 d_hi = 0
61- cat = None
6261 m = re1 .match (line )
6362 if m :
6463 d_lo = m .group (1 )
6564 d_hi = m .group (1 )
66- cat = m .group (2 )
65+ prop = m .group (2 ). strip ( )
6766 else :
6867 m = re2 .match (line )
6968 if m :
7069 d_lo = m .group (1 )
7170 d_hi = m .group (2 )
72- cat = m .group (3 )
71+ prop = m .group (3 ). strip ( )
7372 else :
7473 continue
75- if cat != "Allowed" :
74+ if interestingprops and prop not in interestingprops :
7675 continue
7776 d_lo = int (d_lo , 16 )
7877 d_hi = int (d_hi , 16 )
79- statuses .append ((d_lo , d_hi ))
80- return statuses
78+ if prop not in props :
79+ props [prop ] = []
80+ props [prop ].append ((d_lo , d_hi ))
81+
82+ return props
8183
8284def format_table_content (f , content , indent ):
8385 line = " " * indent
@@ -115,41 +117,57 @@ def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True,
115117 format_table_content (f , data , 8 )
116118 f .write ("\n ];\n \n " )
117119
118- def emit_identifier_status_module ( f , statuses_table ):
119- f .write ("pub mod identifier_status {" )
120+ def emit_identifier_module ( f ):
121+ f .write ("pub mod identifier {" )
120122 f .write ("""
121- use core::result::Result::{Ok, Err};
123+ #[inline]
124+ pub fn identifier_status_allowed(c: char) -> bool {
125+ // FIXME: do we want to special case ASCII here?
126+ match c as usize {
127+ _ => super::util::bsearch_range_table(c, identifier_status_table)
128+ }
129+ }
130+ """ )
131+
132+ f .write (" // Identifier status table:\n " )
133+ identifier_status_table = load_properties ("IdentifierStatus.txt" )
134+ emit_table (f , "identifier_status_table" , identifier_status_table ['Allowed' ], "&'static [(char, char)]" , is_pub = False ,
135+ pfun = lambda x : "(%s,%s)" % (escape_char (x [0 ]), escape_char (x [1 ])))
136+ f .write ("}\n \n " )
122137
138+ def emit_util_mod (f ):
139+ f .write ("""
140+ pub mod util {
141+ use core::result::Result::{Ok, Err};
123142 #[inline]
124- fn bsearch_range_value_table (c: char, r: &'static [(char, char)]) -> bool {
143+ pub fn bsearch_range_table (c: char, r: &'static [(char,char)]) -> bool {
125144 use core::cmp::Ordering::{Equal, Less, Greater};
126- match r.binary_search_by(|&(lo, hi)| {
145+ r.binary_search_by(|&(lo,hi)| {
146+ if lo <= c && c <= hi { Equal }
147+ else if hi < c { Less }
148+ else { Greater }
149+ }).is_ok()
150+ }
151+
152+ pub fn bsearch_range_value_table<T: Copy>(c: char, r: &'static [(char, char, T)]) -> Option<T> {
153+ use core::cmp::Ordering::{Equal, Less, Greater};
154+ match r.binary_search_by(|&(lo, hi, _)| {
127155 if lo <= c && c <= hi { Equal }
128156 else if hi < c { Less }
129157 else { Greater }
130158 }) {
131- Ok(_) => true,
132- Err(_) => false
159+ Ok(idx) => {
160+ let (_, _, cat) = r[idx];
161+ Some(cat)
162+ }
163+ Err(_) => None
133164 }
134165 }
135- """ )
136166
137- f .write ("""
138- #[inline]
139- pub fn identifier_status_allowed(c: char) -> bool {
140- // FIXME: do we want to special case ASCII here?
141- match c as usize {
142- _ => bsearch_range_value_table(c, identifier_status_table)
143- }
144- }
167+ }
145168
146169""" )
147170
148- f .write (" // identifier status table.\n " )
149- emit_table (f , "identifier_status_table" , statuses_table , "&'static [(char, char)]" , is_pub = False ,
150- pfun = lambda x : "(%s,%s)" % (escape_char (x [0 ]), escape_char (x [1 ])))
151- f .write ("}\n \n " )
152-
153171if __name__ == "__main__" :
154172 r = "tables.rs"
155173 if os .path .exists (r ):
@@ -164,6 +182,7 @@ def emit_identifier_status_module(f, statuses_table):
164182pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
165183
166184""" % UNICODE_VERSION )
167- ### identifier status module
168- identifier_status_table = load_identifier_status ()
169- emit_identifier_status_module (rf , identifier_status_table )
185+
186+ emit_util_mod (rf )
187+ ### identifier module
188+ emit_identifier_module (rf )
0 commit comments