@@ -29,3 +29,82 @@ mat!(uni_boundary_none, r"\d\b", "6δ", None);
2929mat ! ( uni_boundary_ogham, r"\d\b" , "6 " , Some ( ( 0 , 1 ) ) ) ;
3030mat ! ( uni_not_boundary_none, r"\d\B" , "6δ" , Some ( ( 0 , 1 ) ) ) ;
3131mat ! ( uni_not_boundary_ogham, r"\d\B" , "6 " , None ) ;
32+
33+ // Test general categories.
34+ //
35+ // We should test more, but there's a lot. Write a script to generate more of
36+ // these tests.
37+ mat ! ( uni_class_gencat_cased_letter,
38+ r"\p{Cased_Letter}" , "A" , Some ( ( 0 , 3 ) ) ) ;
39+ mat ! ( uni_class_gencat_close_punctuation,
40+ r"\p{Close_Punctuation}" , "❯" , Some ( ( 0 , 3 ) ) ) ;
41+ mat ! ( uni_class_gencat_connector_punctuation,
42+ r"\p{Connector_Punctuation}" , "⁀" , Some ( ( 0 , 3 ) ) ) ;
43+ mat ! ( uni_class_gencat_control,
44+ r"\p{Control}" , "\u{9f} " , Some ( ( 0 , 2 ) ) ) ;
45+ mat ! ( uni_class_gencat_currency_symbol,
46+ r"\p{Currency_Symbol}" , "£" , Some ( ( 0 , 3 ) ) ) ;
47+ mat ! ( uni_class_gencat_dash_punctuation,
48+ r"\p{Dash_Punctuation}" , "〰" , Some ( ( 0 , 3 ) ) ) ;
49+ mat ! ( uni_class_gencat_decimal_numer,
50+ r"\p{Decimal_Number}" , "𑓙" , Some ( ( 0 , 4 ) ) ) ;
51+ mat ! ( uni_class_gencat_enclosing_mark,
52+ r"\p{Enclosing_Mark}" , "\u{A672} " , Some ( ( 0 , 3 ) ) ) ;
53+ mat ! ( uni_class_gencat_final_punctuation,
54+ r"\p{Final_Punctuation}" , "⸡" , Some ( ( 0 , 3 ) ) ) ;
55+ mat ! ( uni_class_gencat_format,
56+ r"\p{Format}" , "\u{E007F} " , Some ( ( 0 , 4 ) ) ) ;
57+ mat ! ( uni_class_gencat_initial_punctuation,
58+ r"\p{Initial_Punctuation}" , "⸜" , Some ( ( 0 , 3 ) ) ) ;
59+ mat ! ( uni_class_gencat_letter,
60+ r"\p{Letter}" , "Έ" , Some ( ( 0 , 2 ) ) ) ;
61+ mat ! ( uni_class_gencat_letter_number,
62+ r"\p{Letter_Number}" , "ↂ" , Some ( ( 0 , 3 ) ) ) ;
63+ mat ! ( uni_class_gencat_line_separator,
64+ r"\p{Line_Separator}" , "\u{2028} " , Some ( ( 0 , 3 ) ) ) ;
65+ mat ! ( uni_class_gencat_lowercase_letter,
66+ r"\p{Lowercase_Letter}" , "ϛ" , Some ( ( 0 , 2 ) ) ) ;
67+ mat ! ( uni_class_gencat_mark,
68+ r"\p{Mark}" , "\u{E01EF} " , Some ( ( 0 , 4 ) ) ) ;
69+ mat ! ( uni_class_gencat_math,
70+ r"\p{Math}" , "⋿" , Some ( ( 0 , 3 ) ) ) ;
71+ mat ! ( uni_class_gencat_modifier_letter,
72+ r"\p{Modifier_Letter}" , "𖭃" , Some ( ( 0 , 4 ) ) ) ;
73+ mat ! ( uni_class_gencat_modifier_symbol,
74+ r"\p{Modifier_Symbol}" , "🏿" , Some ( ( 0 , 4 ) ) ) ;
75+ mat ! ( uni_class_gencat_nonspacing_mark,
76+ r"\p{Nonspacing_Mark}" , "\u{1E94A} " , Some ( ( 0 , 4 ) ) ) ;
77+ mat ! ( uni_class_gencat_number,
78+ r"\p{Number}" , "⓿" , Some ( ( 0 , 3 ) ) ) ;
79+ mat ! ( uni_class_gencat_open_punctuation,
80+ r"\p{Open_Punctuation}" , "⦅" , Some ( ( 0 , 3 ) ) ) ;
81+ mat ! ( uni_class_gencat_other,
82+ r"\p{Other}" , "\u{bc9} " , Some ( ( 0 , 3 ) ) ) ;
83+ mat ! ( uni_class_gencat_other_letter,
84+ r"\p{Other_Letter}" , "ꓷ" , Some ( ( 0 , 3 ) ) ) ;
85+ mat ! ( uni_class_gencat_other_number,
86+ r"\p{Other_Number}" , "㉏" , Some ( ( 0 , 3 ) ) ) ;
87+ mat ! ( uni_class_gencat_other_punctuation,
88+ r"\p{Other_Punctuation}" , "𞥞" , Some ( ( 0 , 4 ) ) ) ;
89+ mat ! ( uni_class_gencat_other_symbol,
90+ r"\p{Other_Symbol}" , "⅌" , Some ( ( 0 , 3 ) ) ) ;
91+ mat ! ( uni_class_gencat_paragraph_separator,
92+ r"\p{Paragraph_Separator}" , "\u{2029} " , Some ( ( 0 , 3 ) ) ) ;
93+ mat ! ( uni_class_gencat_private_use,
94+ r"\p{Private_Use}" , "\u{10FFFD} " , Some ( ( 0 , 4 ) ) ) ;
95+ mat ! ( uni_class_gencat_punctuation,
96+ r"\p{Punctuation}" , "𑁍" , Some ( ( 0 , 4 ) ) ) ;
97+ mat ! ( uni_class_gencat_separator,
98+ r"\p{Separator}" , "\u{3000} " , Some ( ( 0 , 3 ) ) ) ;
99+ mat ! ( uni_class_gencat_space_separator,
100+ r"\p{Space_Separator}" , "\u{205F} " , Some ( ( 0 , 3 ) ) ) ;
101+ mat ! ( uni_class_gencat_spacing_mark,
102+ r"\p{Spacing_Mark}" , "\u{16F7E} " , Some ( ( 0 , 4 ) ) ) ;
103+ mat ! ( uni_class_gencat_symbol,
104+ r"\p{Symbol}" , "⯈" , Some ( ( 0 , 3 ) ) ) ;
105+ mat ! ( uni_class_gencat_titlecase_letter,
106+ r"\p{Titlecase_Letter}" , "ῼ" , Some ( ( 0 , 3 ) ) ) ;
107+ mat ! ( uni_class_gencat_unassigned,
108+ r"\p{Unassigned}" , "\u{10FFFF} " , Some ( ( 0 , 4 ) ) ) ;
109+ mat ! ( uni_class_gencat_uppercase_letter,
110+ r"\p{Uppercase_Letter}" , "Ꝋ" , Some ( ( 0 , 3 ) ) ) ;
0 commit comments