@@ -703,6 +703,106 @@ impl [u8] {
703703 assert_eq ! ( read_offset, self . len( ) ) ;
704704 if write_offset < read_offset { Ok ( write_offset) } else { Err ( queue) }
705705 }
706+
707+ #[ rustc_allow_incoherent_impl]
708+ #[ unstable( issue = "none" , feature = "std_internals" ) ]
709+ #[ allow( dead_code) ]
710+ /// Safety:
711+ /// - Must be UTF-8
712+ pub unsafe fn make_utf8_lowercase ( & mut self ) -> Result < usize , VecDeque < u8 > > {
713+ let mut queue = VecDeque :: new ( ) ;
714+
715+ let mut read_offset = 0 ;
716+ let mut write_offset = 0 ;
717+
718+ let mut buffer = [ 0 ; 4 ] ;
719+ let mut final_sigma_automata = FinalSigmaAutomata :: new ( ) ;
720+ while let Some ( ( codepoint, width) ) =
721+ unsafe { core:: str:: next_code_point_with_width ( & mut self [ read_offset..] . iter ( ) ) }
722+ {
723+ read_offset += width;
724+ let uppercase_char = unsafe { char:: from_u32_unchecked ( codepoint) } ;
725+ if uppercase_char == 'Σ' {
726+ // Σ maps to σ, except at the end of a word where it maps to ς.
727+ // See core::str::to_lowercase
728+ let rest = unsafe { core:: str:: from_utf8_unchecked ( & self [ read_offset..] ) } ;
729+ let is_word_final =
730+ final_sigma_automata. is_accepting ( ) && !case_ignorable_then_cased ( rest. chars ( ) ) ;
731+ let sigma_lowercase = if is_word_final { 'ς' } else { 'σ' } ;
732+ let l = sigma_lowercase. len_utf8 ( ) ;
733+ sigma_lowercase. encode_utf8 ( & mut buffer) ;
734+ queue. extend ( & buffer[ ..l] ) ;
735+ } else {
736+ for c in uppercase_char. to_lowercase ( ) {
737+ let l = c. len_utf8 ( ) ;
738+ c. encode_utf8 ( & mut buffer) ;
739+ queue. extend ( & buffer[ ..l] ) ;
740+ }
741+ }
742+ final_sigma_automata. step ( uppercase_char) ;
743+ while write_offset < read_offset {
744+ match queue. pop_front ( ) {
745+ Some ( b) => {
746+ self [ write_offset] = b;
747+ write_offset += 1 ;
748+ }
749+ None => break ,
750+ }
751+ }
752+ }
753+ assert_eq ! ( read_offset, self . len( ) ) ;
754+ return if write_offset < read_offset { Ok ( write_offset) } else { Err ( queue) } ;
755+
756+ // For now this is copy pasted from core::str, FIXME: DRY
757+ fn case_ignorable_then_cased < I : Iterator < Item = char > > ( iter : I ) -> bool {
758+ use core:: unicode:: { Case_Ignorable , Cased } ;
759+ match iter. skip_while ( |& c| Case_Ignorable ( c) ) . next ( ) {
760+ Some ( c) => Cased ( c) ,
761+ None => false ,
762+ }
763+ }
764+ }
765+ }
766+
767+ #[ derive( Clone ) ]
768+ enum FinalSigmaAutomata {
769+ Init ,
770+ Accepted ,
771+ }
772+
773+ impl FinalSigmaAutomata {
774+ fn new ( ) -> Self {
775+ Self :: Init
776+ }
777+
778+ fn is_accepting ( & self ) -> bool {
779+ match self {
780+ FinalSigmaAutomata :: Accepted => true ,
781+ FinalSigmaAutomata :: Init => false ,
782+ }
783+ }
784+
785+ fn step ( & mut self , c : char ) {
786+ use core:: unicode:: { Case_Ignorable , Cased } ;
787+
788+ use FinalSigmaAutomata :: * ;
789+ * self = match self {
790+ Init => {
791+ if Cased ( c) {
792+ Accepted
793+ } else {
794+ Init
795+ }
796+ }
797+ Accepted => {
798+ if Cased ( c) || Case_Ignorable ( c) {
799+ Accepted
800+ } else {
801+ Init
802+ }
803+ }
804+ }
805+ }
706806}
707807
708808#[ cfg( not( test) ) ]
0 commit comments