1818# Since this should not require frequent updates, we just store this
1919# out-of-line and check the unicode.rs file into git.
2020import collections
21- import requests
21+ import urllib . request
2222
2323UNICODE_VERSION = "9.0.0"
2424UCD_URL = "https://www.unicode.org/Public/%s/ucd/" % UNICODE_VERSION
@@ -68,9 +68,9 @@ def __init__(self):
6868
6969 def stats (name , table ):
7070 count = sum (len (v ) for v in table .values ())
71- print "%s: %d chars => %d decomposed chars" % (name , len (table ), count )
71+ print ( "%s: %d chars => %d decomposed chars" % (name , len (table ), count ) )
7272
73- print "Decomposition table stats:"
73+ print ( "Decomposition table stats:" )
7474 stats ("Canonical decomp" , self .canon_decomp )
7575 stats ("Compatible decomp" , self .compat_decomp )
7676 stats ("Canonical fully decomp" , self .canon_fully_decomp )
@@ -79,8 +79,8 @@ def stats(name, table):
7979 self .ss_leading , self .ss_trailing = self ._compute_stream_safe_tables ()
8080
8181 def _fetch (self , filename ):
82- resp = requests . get (UCD_URL + filename )
83- return resp .text
82+ resp = urllib . request . urlopen (UCD_URL + filename )
83+ return resp .read (). decode ( 'utf-8' )
8484
8585 def _load_unicode_data (self ):
8686 self .combining_classes = {}
@@ -234,7 +234,7 @@ def _decompose(char_int, compatible):
234234 # need to store their overlap when they agree. When they don't agree,
235235 # store the decomposition in the compatibility table since we'll check
236236 # that first when normalizing to NFKD.
237- assert canon_fully_decomp <= compat_fully_decomp
237+ assert set ( canon_fully_decomp ) <= set ( compat_fully_decomp )
238238
239239 for ch in set (canon_fully_decomp ) & set (compat_fully_decomp ):
240240 if canon_fully_decomp [ch ] == compat_fully_decomp [ch ]:
@@ -284,15 +284,15 @@ def _compute_stream_safe_tables(self):
284284
285285 return leading_nonstarters , trailing_nonstarters
286286
287- hexify = lambda c : hex ( c )[ 2 :]. upper (). rjust ( 4 , '0' )
287+ hexify = lambda c : '{:04X}' . format ( c )
288288
289289def gen_combining_class (combining_classes , out ):
290290 out .write ("#[inline]\n " )
291291 out .write ("pub fn canonical_combining_class(c: char) -> u8 {\n " )
292292 out .write (" match c {\n " )
293293
294294 for char , combining_class in sorted (combining_classes .items ()):
295- out .write (" '\u{%s}' => %s,\n " % (hexify (char ), combining_class ))
295+ out .write (" '\\ u{%s}' => %s,\n " % (hexify (char ), combining_class ))
296296
297297 out .write (" _ => 0,\n " )
298298 out .write (" }\n " )
@@ -304,7 +304,7 @@ def gen_composition_table(canon_comp, out):
304304 out .write (" match (c1, c2) {\n " )
305305
306306 for (c1 , c2 ), c3 in sorted (canon_comp .items ()):
307- out .write (" ('\u{%s}', '\u{%s}') => Some('\u{%s}'),\n " % (hexify (c1 ), hexify (c2 ), hexify (c3 )))
307+ out .write (" ('\\ u{%s}', '\\ u{%s}') => Some('\ \ u{%s}'),\n " % (hexify (c1 ), hexify (c2 ), hexify (c3 )))
308308
309309 out .write (" _ => None,\n " )
310310 out .write (" }\n " )
@@ -323,8 +323,8 @@ def gen_decomposition_tables(canon_decomp, compat_decomp, out):
323323 out .write (" Some(match c {\n " )
324324
325325 for char , chars in sorted (table .items ()):
326- d = ", " .join ("'\u{%s}'" % hexify (c ) for c in chars )
327- out .write (" '\u{%s}' => &[%s],\n " % (hexify (char ), d ))
326+ d = ", " .join ("'\\ u{%s}'" % hexify (c ) for c in chars )
327+ out .write (" '\\ u{%s}' => &[%s],\n " % (hexify (char ), d ))
328328
329329 out .write (" _ => return None,\n " )
330330 out .write (" })\n " )
@@ -375,8 +375,8 @@ def gen_combining_mark(general_category_mark, out):
375375 out .write ("pub fn is_combining_mark(c: char) -> bool {\n " )
376376 out .write (" match c {\n " )
377377
378- for char in general_category_mark :
379- out .write (" '\u{%s}' => true,\n " % hexify (char ))
378+ for char in sorted ( general_category_mark ) :
379+ out .write (" '\\ u{%s}' => true,\n " % hexify (char ))
380380
381381 out .write (" _ => false,\n " )
382382 out .write (" }\n " )
@@ -387,8 +387,8 @@ def gen_stream_safe(leading, trailing, out):
387387 out .write ("pub fn stream_safe_leading_nonstarters(c: char) -> usize {\n " )
388388 out .write (" match c {\n " )
389389
390- for char , num_leading in leading .items ():
391- out .write (" '\u{%s}' => %d,\n " % (hexify (char ), num_leading ))
390+ for char , num_leading in sorted ( leading .items () ):
391+ out .write (" '\\ u{%s}' => %d,\n " % (hexify (char ), num_leading ))
392392
393393 out .write (" _ => 0,\n " )
394394 out .write (" }\n " )
@@ -399,8 +399,8 @@ def gen_stream_safe(leading, trailing, out):
399399 out .write ("pub fn stream_safe_trailing_nonstarters(c: char) -> usize {\n " )
400400 out .write (" match c {\n " )
401401
402- for char , num_trailing in trailing .items ():
403- out .write (" '\u{%s}' => %d,\n " % (hexify (char ), num_trailing ))
402+ for char , num_trailing in sorted ( trailing .items () ):
403+ out .write (" '\\ u{%s}' => %d,\n " % (hexify (char ), num_trailing ))
404404
405405 out .write (" _ => 0,\n " )
406406 out .write (" }\n " )
@@ -419,7 +419,7 @@ def gen_tests(tests, out):
419419""" )
420420
421421 out .write ("pub const NORMALIZATION_TESTS: &[NormalizationTest] = &[\n " )
422- str_literal = lambda s : '"%s"' % "" .join ("\u{%s}" % c for c in s )
422+ str_literal = lambda s : '"%s"' % "" .join ("\\ u{%s}" % c for c in s )
423423
424424 for test in tests :
425425 out .write (" NormalizationTest {\n " )
@@ -434,7 +434,7 @@ def gen_tests(tests, out):
434434
435435if __name__ == '__main__' :
436436 data = UnicodeData ()
437- with open ("tables.rs" , "w" ) as out :
437+ with open ("tables.rs" , "w" , newline = " \n " ) as out :
438438 out .write (PREAMBLE )
439439 out .write ("use quick_check::IsNormalized;\n " )
440440 out .write ("use quick_check::IsNormalized::*;\n " )
@@ -470,6 +470,6 @@ def gen_tests(tests, out):
470470 gen_stream_safe (data .ss_leading , data .ss_trailing , out )
471471 out .write ("\n " )
472472
473- with open ("normalization_tests.rs" , "w" ) as out :
473+ with open ("normalization_tests.rs" , "w" , newline = " \n " ) as out :
474474 out .write (PREAMBLE )
475475 gen_tests (data .norm_tests , out )
0 commit comments