2020# out-of-line and check the tables.rs and normalization_tests.rs files into git.
2121import collections
2222import urllib .request
23+ from itertools import batched
2324
2425UNICODE_VERSION = "15.1.0"
2526UCD_URL = "https://www.unicode.org/Public/%s/ucd/" % UNICODE_VERSION
@@ -354,20 +355,26 @@ def is_first_and_last(first, last):
354355 return False
355356 return first [1 :- 8 ] == last [1 :- 7 ]
356357
357- def gen_mph_data (name , d , kv_type , kv_callback ):
358+ def gen_mph_data (name , d , kv_type , kv_callback , kv_row_width ):
358359 (salt , keys ) = minimal_perfect_hash (d )
359- out .write ("pub(crate) const %s_SALT: &[u16] = &[\n " % name .upper ())
360- for s in salt :
361- out .write (" 0x{:x},\n " .format (s ))
360+ out .write (f"\n pub(crate) const { name .upper ()} _SALT: &[u16] = &[\n " )
361+ for s_row in batched (salt , 13 ):
362+ out .write (" " )
363+ for s in s_row :
364+ out .write (f" 0x{ s :03X} ," )
365+ out .write ("\n " )
366+ out .write ("];\n " )
367+ out .write (f"pub(crate) const { name .upper ()} _KV: &[{ kv_type } ] = &[\n " )
368+ for k_row in batched (keys , kv_row_width ):
369+ out .write (" " )
370+ for k in k_row :
371+ out .write (f" { kv_callback (k )} ," )
372+ out .write ("\n " )
362373 out .write ("];\n " )
363- out .write ("pub(crate) const {}_KV: &[{}] = &[\n " .format (name .upper (), kv_type ))
364- for k in keys :
365- out .write (" {},\n " .format (kv_callback (k )))
366- out .write ("];\n \n " )
367374
368375def gen_combining_class (combining_classes , out ):
369376 gen_mph_data ('canonical_combining_class' , combining_classes , 'u32' ,
370- lambda k : "0x{:X}" . format ( int (combining_classes [k ]) | (k << 8 )) )
377+ lambda k : f "0x{ int (combining_classes [k ]) | (k << 8 ):07X } " , 8 )
371378
372379def gen_composition_table (canon_comp , out ):
373380 table = {}
@@ -376,7 +383,7 @@ def gen_composition_table(canon_comp, out):
376383 table [(c1 << 16 ) | c2 ] = c3
377384 (salt , keys ) = minimal_perfect_hash (table )
378385 gen_mph_data ('COMPOSITION_TABLE' , table , '(u32, char)' ,
379- lambda k : "(0x%s , '\\ u{%s} ')" % ( hexify ( k ), hexify ( table [ k ])) )
386+ lambda k : f "(0x{ k :08X } , '\\ u{{ { table [ k ]:06X } }} ')", 1 )
380387
381388 out .write ("pub(crate) fn composition_table_astral(c1: char, c2: char) -> Option<char> {\n " )
382389 out .write (" match (c1, c2) {\n " )
@@ -403,7 +410,7 @@ def gen_decomposition_tables(canon_decomp, compat_decomp, cjk_compat_variants_de
403410 assert offset < 65536
404411 out .write ("];\n " )
405412 gen_mph_data (name + '_decomposed' , table , "(u32, (u16, u16))" ,
406- lambda k : "(0x{:x }, ({}, {}))" . format ( k , offsets [k ], len (table [k ])) )
413+ lambda k : f "(0x{ k :05X } , (0x { offsets [k ]:03X } , 0x { len (table [k ]):X } ))" , 1 )
407414
408415def gen_qc_match (prop_table , out ):
409416 out .write (" match c {\n " )
@@ -421,7 +428,7 @@ def gen_qc_match(prop_table, out):
421428 out .write (" }\n " )
422429
423430def gen_nfc_qc (prop_tables , out ):
424- out .write ("#[inline]\n " )
431+ out .write ("\n #[inline]\n " )
425432 out .write ("#[allow(ellipsis_inclusive_range_patterns)]\n " )
426433 out .write ("pub fn qc_nfc(c: char) -> IsNormalized {\n " )
427434 gen_qc_match (prop_tables ['NFC_QC' ], out )
@@ -450,13 +457,13 @@ def gen_nfkd_qc(prop_tables, out):
450457
451458def gen_combining_mark (general_category_mark , out ):
452459 gen_mph_data ('combining_mark' , general_category_mark , 'u32' ,
453- lambda k : '0x{:04x }' .format (k ))
460+ lambda k : '0x{:05X }' .format (k ), 10 )
454461
455462def gen_public_assigned (general_category_public_assigned , out ):
456463 # This could be done as a hash but the table is somewhat small.
457464 out .write ("#[inline]\n " )
458465 out .write ("pub fn is_public_assigned(c: char) -> bool {\n " )
459- out .write (" match c { \n " )
466+ out .write (" matches!(c, \n " )
460467
461468 start = True
462469 for first , last in general_category_public_assigned :
@@ -469,12 +476,9 @@ def gen_public_assigned(general_category_public_assigned, out):
469476 out .write ("'\\ u{%s}'\n " % hexify (first ))
470477 else :
471478 out .write ("'\\ u{%s}'..='\\ u{%s}'\n " % (hexify (first ), hexify (last )))
472- out .write (" => true,\n " )
473479
474- out .write (" _ => false,\n " )
475- out .write (" }\n " )
480+ out .write (" )\n " )
476481 out .write ("}\n " )
477- out .write ("\n " )
478482
479483def gen_stream_safe (leading , trailing , out ):
480484 # This could be done as a hash but the table is very small.
@@ -488,10 +492,9 @@ def gen_stream_safe(leading, trailing, out):
488492 out .write (" _ => 0,\n " )
489493 out .write (" }\n " )
490494 out .write ("}\n " )
491- out .write ("\n " )
492495
493496 gen_mph_data ('trailing_nonstarters' , trailing , 'u32' ,
494- lambda k : "0x{:X}" . format ( int (trailing [k ]) | (k << 8 )) )
497+ lambda k : f "0x{ int (trailing [k ]) | (k << 8 ):07X } " , 8 )
495498
496499def gen_tests (tests , out ):
497500 out .write ("""#[derive(Debug)]
@@ -585,37 +588,28 @@ def minimal_perfect_hash(d):
585588
586589 version = "(%s, %s, %s)" % tuple (UNICODE_VERSION .split ("." ))
587590 out .write ("#[allow(unused)]\n " )
588- out .write ("pub const UNICODE_VERSION: (u8, u8, u8) = %s;\n \n " % version )
591+ out .write ("pub const UNICODE_VERSION: (u8, u8, u8) = %s;\n " % version )
589592
590593 gen_combining_class (data .combining_classes , out )
591- out .write ("\n " )
592594
593595 gen_composition_table (data .canon_comp , out )
594- out .write ("\n " )
595596
596597 gen_decomposition_tables (data .canon_fully_decomp , data .compat_fully_decomp , data .cjk_compat_variants_fully_decomp , out )
597598
598599 gen_combining_mark (data .general_category_mark , out )
599- out .write ("\n " )
600600
601601 gen_public_assigned (data .general_category_public_assigned , out )
602- out .write ("\n " )
603602
604603 gen_nfc_qc (data .norm_props , out )
605- out .write ("\n " )
606604
607605 gen_nfkc_qc (data .norm_props , out )
608- out .write ("\n " )
609606
610607 gen_nfd_qc (data .norm_props , out )
611- out .write ("\n " )
612608
613609 gen_nfkd_qc (data .norm_props , out )
614- out .write ("\n " )
615610
616611 gen_stream_safe (data .ss_leading , data .ss_trailing , out )
617- out .write ("\n " )
618612
619613 with open ("normalization_tests.rs" , "w" , newline = "\n " ) as out :
620614 out .write (PREAMBLE )
621- gen_tests (data .norm_tests , out )
615+ gen_tests (data .norm_tests , out )
0 commit comments