1919# programs". It is not meant to be a complete implementation of unicode.
2020# For that we recommend you use a proper binding to libicu.
2121
22- import fileinput , re , os , sys
22+ import fileinput , re , os , sys , operator
2323
2424
2525def fetch (f ):
@@ -35,6 +35,8 @@ def fetch(f):
3535def load_unicode_data (f ):
3636 fetch (f )
3737 gencats = {}
38+ upperlower = {}
39+ lowerupper = {}
3840 combines = []
3941 canon_decomp = {}
4042 compat_decomp = {}
@@ -44,6 +46,7 @@ def load_unicode_data(f):
4446 c_hi = 0
4547 com_lo = 0
4648 com_hi = 0
49+
4750 for line in fileinput .input (f ):
4851 fields = line .split (";" )
4952 if len (fields ) != 15 :
@@ -52,7 +55,17 @@ def load_unicode_data(f):
5255 decomp , deci , digit , num , mirror ,
5356 old , iso , upcase , lowcase , titlecase ] = fields
5457
55- code = int (code , 16 )
58+ code_org = code
59+ code = int (code , 16 )
60+
61+ # generate char to char direct common and simple conversions
62+ # uppercase to lowercase
63+ if gencat == "Lu" and lowcase != "" and code_org != lowcase :
64+ upperlower [code ] = int (lowcase , 16 )
65+
66+ # lowercase to uppercase
67+ if gencat == "Ll" and upcase != "" and code_org != upcase :
68+ lowerupper [code ] = int (upcase , 16 )
5669
5770 if decomp != "" :
5871 if decomp .startswith ('<' ):
@@ -96,7 +109,7 @@ def load_unicode_data(f):
96109 com_lo = code
97110 com_hi = code
98111
99- return (canon_decomp , compat_decomp , gencats , combines )
112+ return (canon_decomp , compat_decomp , gencats , combines , lowerupper , upperlower )
100113
101114def load_properties (f , interestingprops ):
102115 fetch (f )
@@ -147,25 +160,28 @@ def ch_prefix(ix):
147160
148161def emit_bsearch_range_table (f ):
149162 f .write ("""
150- fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
151- use cmp::{Equal, Less, Greater};
152- use vec::ImmutableVector;
153- use option::None;
154- r.bsearch(|&(lo,hi)| {
155- if lo <= c && c <= hi { Equal }
156- else if hi < c { Less }
157- else { Greater }
158- }) != None
159- }\n \n
163+ fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
164+ use cmp::{Equal, Less, Greater};
165+ use vec::ImmutableVector;
166+ use option::None;
167+ r.bsearch(|&(lo,hi)| {
168+ if lo <= c && c <= hi { Equal }
169+ else if hi < c { Less }
170+ else { Greater }
171+ }) != None
172+ }\n \n
160173""" );
161174
162175def emit_property_module (f , mod , tbl ):
163176 f .write ("pub mod %s {\n " % mod )
164177 keys = tbl .keys ()
165178 keys .sort ()
166- emit_bsearch_range_table ( f );
179+
167180 for cat in keys :
168- if cat == "Cs" : continue
181+ if cat not in ["Nd" , "Nl" , "No" , "Cc" ,
182+ "XID_Start" , "XID_Continue" , "Alphabetic" ,
183+ "Lowercase" , "Uppercase" , "White_Space" ]:
184+ continue
169185 f .write (" static %s_table : &'static [(char,char)] = &[\n " % cat )
170186 ix = 0
171187 for pair in tbl [cat ]:
@@ -175,35 +191,55 @@ def emit_property_module(f, mod, tbl):
175191 f .write ("\n ];\n \n " )
176192
177193 f .write (" pub fn %s(c: char) -> bool {\n " % cat )
178- f .write (" bsearch_range_table(c, %s_table)\n " % cat )
194+ f .write (" super:: bsearch_range_table(c, %s_table)\n " % cat )
179195 f .write (" }\n \n " )
180196 f .write ("}\n " )
181197
182198
183- def emit_property_module_old (f , mod , tbl ):
184- f .write ("mod %s {\n " % mod )
185- keys = tbl .keys ()
186- keys .sort ()
187- for cat in keys :
188- f .write (" fn %s(c: char) -> bool {\n " % cat )
189- f .write (" ret alt c {\n " )
190- prefix = ' '
191- for pair in tbl [cat ]:
192- if pair [0 ] == pair [1 ]:
193- f .write (" %c %s\n " %
194- (prefix , escape_char (pair [0 ])))
195- else :
196- f .write (" %c %s to %s\n " %
197- (prefix ,
198- escape_char (pair [0 ]),
199- escape_char (pair [1 ])))
200- prefix = '|'
201- f .write (" { true }\n " )
202- f .write (" _ { false }\n " )
203- f .write (" };\n " )
204- f .write (" }\n \n " )
199+ def emit_conversions_module (f , lowerupper , upperlower ):
200+ f .write ("pub mod conversions {\n " )
201+ f .write ("""
202+ use cmp::{Equal, Less, Greater};
203+ use vec::ImmutableVector;
204+ use tuple::Tuple2;
205+ use option::{Option, Some, None};
206+
207+ pub fn to_lower(c: char) -> char {
208+ match bsearch_case_table(c, LuLl_table) {
209+ None => c,
210+ Some(index) => LuLl_table[index].val1()
211+ }
212+ }
213+
214+ pub fn to_upper(c: char) -> char {
215+ match bsearch_case_table(c, LlLu_table) {
216+ None => c,
217+ Some(index) => LlLu_table[index].val1()
218+ }
219+ }
220+
221+ fn bsearch_case_table(c: char, table: &'static [(char, char)]) -> Option<uint> {
222+ table.bsearch(|&(key, _)| {
223+ if c == key { Equal }
224+ else if key < c { Less }
225+ else { Greater }
226+ })
227+ }
228+ """ );
229+ emit_caseconversion_table (f , "LuLl" , upperlower )
230+ emit_caseconversion_table (f , "LlLu" , lowerupper )
205231 f .write ("}\n " )
206232
233+ def emit_caseconversion_table (f , name , table ):
234+ f .write (" static %s_table : &'static [(char, char)] = &[\n " % name )
235+ sorted_table = sorted (table .iteritems (), key = operator .itemgetter (0 ))
236+ ix = 0
237+ for key , value in sorted_table :
238+ f .write (ch_prefix (ix ))
239+ f .write ("(%s, %s)" % (escape_char (key ), escape_char (value )))
240+ ix += 1
241+ f .write ("\n ];\n \n " )
242+
207243def format_table_content (f , content , indent ):
208244 line = " " * indent
209245 first = True
@@ -359,7 +395,8 @@ def emit_decomp_module(f, canon, compat, combine):
359395 os .remove (i );
360396rf = open (r , "w" )
361397
362- (canon_decomp , compat_decomp , gencats , combines ) = load_unicode_data ("UnicodeData.txt" )
398+ (canon_decomp , compat_decomp , gencats ,
399+ combines , lowerupper , upperlower ) = load_unicode_data ("UnicodeData.txt" )
363400
364401# Preamble
365402rf .write ('''// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
@@ -379,13 +416,16 @@ def emit_decomp_module(f, canon, compat, combine):
379416
380417''' )
381418
419+ emit_bsearch_range_table (rf );
382420emit_property_module (rf , "general_category" , gencats )
383421
384422emit_decomp_module (rf , canon_decomp , compat_decomp , combines )
385423
386424derived = load_properties ("DerivedCoreProperties.txt" ,
387425 ["XID_Start" , "XID_Continue" , "Alphabetic" , "Lowercase" , "Uppercase" ])
426+
388427emit_property_module (rf , "derived_property" , derived )
389428
390429props = load_properties ("PropList.txt" , ["White_Space" ])
391430emit_property_module (rf , "property" , props )
431+ emit_conversions_module (rf , lowerupper , upperlower )
0 commit comments