@@ -6,7 +6,7 @@ use crate::slice;
66use crate :: str:: from_utf8_unchecked_mut;
77use crate :: ub_checks:: assert_unsafe_precondition;
88use crate :: unicode:: printable:: is_printable;
9- use crate :: unicode:: { self , conversions} ;
9+ use crate :: unicode:: { self , Case_Ignorable , conversions} ;
1010
1111impl char {
1212 /// The lowest valid code point a `char` can have, `'\0'`.
@@ -950,7 +950,11 @@ impl char {
950950 #[ stable( feature = "rust1" , since = "1.0.0" ) ]
951951 #[ inline]
952952 pub fn is_control ( self ) -> bool {
953- unicode:: Cc ( self )
953+ // According to
954+ // https://www.unicode.org/policies/stability_policy.html#Property_Value,
955+ // the set of codepoints in `Cc` will never change. So we can hard-code
956+ // the patterns to match against instead of using a table.
957+ matches ! ( self , '\0' ..='\x1f' | '\x7f' ..='\u{9f}' )
954958 }
955959
956960 /// Returns `true` if this `char` has the `Grapheme_Extend` property.
@@ -965,7 +969,47 @@ impl char {
965969 #[ must_use]
966970 #[ inline]
967971 pub ( crate ) fn is_grapheme_extended ( self ) -> bool {
968- unicode:: Grapheme_Extend ( self )
972+ !self . is_ascii ( ) && unicode:: Grapheme_Extend ( self )
973+ }
974+
975+ /// Returns `true` if this `char` has the `Cased` derived property.
976+ ///
977+ /// `Cased` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
978+ /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
979+ ///
980+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
981+ /// [ucd]: https://www.unicode.org/reports/tr44/
982+ /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
983+ #[ must_use]
984+ #[ inline]
985+ #[ doc( hidden) ]
986+ #[ unstable( feature = "char_internals" , reason = "exposed only for libstd" , issue = "none" ) ]
987+ pub fn is_cased ( self ) -> bool {
988+ if self . is_ascii ( ) {
989+ self . is_ascii_alphabetic ( )
990+ } else {
991+ unicode:: Lowercase ( self ) || unicode:: Uppercase ( self ) || unicode:: Lt ( self )
992+ }
993+ }
994+
995+ /// Returns `true` if this `char` has the `Case_Ignorable` property.
996+ ///
997+ /// `Case_Ignorable` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
998+ /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
999+ ///
1000+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1001+ /// [ucd]: https://www.unicode.org/reports/tr44/
1002+ /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
1003+ #[ must_use]
1004+ #[ inline]
1005+ #[ doc( hidden) ]
1006+ #[ unstable( feature = "char_internals" , reason = "exposed only for libstd" , issue = "none" ) ]
1007+ pub fn is_case_ignorable ( self ) -> bool {
1008+ if self . is_ascii ( ) {
1009+ matches ! ( self , '\'' | '.' | ':' | '^' | '`' )
1010+ } else {
1011+ Case_Ignorable ( self )
1012+ }
9691013 }
9701014
9711015 /// Returns `true` if this `char` has one of the general categories for numbers.
0 commit comments