@@ -680,7 +680,7 @@ impl CharClass {
680680 self . canonicalize ( )
681681 }
682682
683- /// Canonicalze any sequence of ranges.
683+ /// Canonicalize any sequence of ranges.
684684 ///
685685 /// This is responsible for enforcing the canonical format invariants
686686 /// as described on the docs for the `CharClass` type.
@@ -703,6 +703,41 @@ impl CharClass {
703703 ordered
704704 }
705705
706+ /// Calculate the intersection of two canonical character classes.
707+ ///
708+ /// The returned intersection is canonical.
709+ fn intersection ( & self , other : & CharClass ) -> CharClass {
710+ if self . ranges . is_empty ( ) || other. ranges . is_empty ( ) {
711+ return CharClass :: empty ( ) ;
712+ }
713+
714+ let mut intersection = CharClass :: empty ( ) ;
715+
716+ let mut iter_a = self . ranges . iter ( ) ;
717+ let mut iter_b = other. ranges . iter ( ) ;
718+ let mut a = iter_a. next ( ) . unwrap ( ) ;
719+ let mut b = iter_b. next ( ) . unwrap ( ) ;
720+ loop {
721+ if let Some ( i) = a. intersection ( & b) {
722+ intersection. ranges . push ( i) ;
723+ }
724+
725+ // If the range with the smaller end didn't match this time,
726+ // it won't ever match, so move on to the next one.
727+ let ( iter, item) = if a. end < b. end {
728+ ( & mut iter_a, & mut a)
729+ } else {
730+ ( & mut iter_b, & mut b)
731+ } ;
732+ match iter. next ( ) {
733+ Some ( v) => * item = v,
734+ None => break , // no more ranges to check, done
735+ }
736+ }
737+
738+ intersection. canonicalize ( )
739+ }
740+
706741 /// Negates the character class.
707742 ///
708743 /// For all `c` where `c` is a Unicode scalar value, `c` matches `self`
@@ -801,6 +836,18 @@ impl ClassRange {
801836 max ( self . start , other. start ) <= inc_char ( min ( self . end , other. end ) )
802837 }
803838
839+ /// Returns the intersection of the two ranges if they have common
840+ /// characters, `None` otherwise.
841+ fn intersection ( & self , other : & ClassRange ) -> Option < ClassRange > {
842+ let start = max ( self . start , other. start ) ;
843+ let end = min ( self . end , other. end ) ;
844+ if start <= end {
845+ Some ( ClassRange :: new ( start, end) )
846+ } else {
847+ None
848+ }
849+ }
850+
804851 /// Creates a new range representing the union of `self` and `other.
805852 fn merge ( self , other : ClassRange ) -> ClassRange {
806853 ClassRange {
@@ -1907,6 +1954,108 @@ mod tests {
19071954 ] ) ) ;
19081955 }
19091956
1957+ #[ test]
1958+ fn class_intersection_empty ( ) {
1959+ let cls1 = class ( & [ ] ) ;
1960+ let cls2 = class ( & [ ( 'a' , 'a' ) ] ) ;
1961+ assert_intersection ( cls1, cls2, class ( & [ ] ) ) ;
1962+ }
1963+
1964+ #[ test]
1965+ fn class_intersection_single_equal ( ) {
1966+ let cls1 = class ( & [ ( 'a' , 'a' ) ] ) ;
1967+ let cls2 = class ( & [ ( 'a' , 'a' ) ] ) ;
1968+ assert_intersection ( cls1, cls2, class ( & [ ( 'a' , 'a' ) ] ) ) ;
1969+ }
1970+
1971+ #[ test]
1972+ fn class_intersection_single_unequal ( ) {
1973+ let cls1 = class ( & [ ( 'a' , 'a' ) ] ) ;
1974+ let cls2 = class ( & [ ( 'b' , 'b' ) ] ) ;
1975+ assert_intersection ( cls1, cls2, class ( & [ ] ) ) ;
1976+ }
1977+
1978+ #[ test]
1979+ fn class_intersection_single_in_other ( ) {
1980+ let cls1 = class ( & [ ( 'a' , 'a' ) ] ) ;
1981+ let cls2 = class ( & [ ( 'a' , 'c' ) ] ) ;
1982+ assert_intersection ( cls1, cls2, class ( & [ ( 'a' , 'a' ) ] ) ) ;
1983+ }
1984+
1985+ #[ test]
1986+ fn class_intersection_range_in_other ( ) {
1987+ let cls1 = class ( & [ ( 'a' , 'b' ) ] ) ;
1988+ let cls2 = class ( & [ ( 'a' , 'c' ) ] ) ;
1989+ assert_intersection ( cls1, cls2, class ( & [ ( 'a' , 'b' ) ] ) ) ;
1990+ }
1991+
1992+ #[ test]
1993+ fn class_intersection_range_intersection ( ) {
1994+ let cls1 = class ( & [ ( 'a' , 'b' ) ] ) ;
1995+ let cls2 = class ( & [ ( 'b' , 'c' ) ] ) ;
1996+ assert_intersection ( cls1, cls2, class ( & [ ( 'b' , 'b' ) ] ) ) ;
1997+ }
1998+
1999+ #[ test]
2000+ fn class_intersection_only_adjacent ( ) {
2001+ let cls1 = class ( & [ ( 'a' , 'b' ) ] ) ;
2002+ let cls2 = class ( & [ ( 'c' , 'd' ) ] ) ;
2003+ assert_intersection ( cls1, cls2, class ( & [ ] ) ) ;
2004+ }
2005+
2006+ #[ test]
2007+ fn class_intersection_range_subset ( ) {
2008+ let cls1 = class ( & [ ( 'b' , 'c' ) ] ) ;
2009+ let cls2 = class ( & [ ( 'a' , 'd' ) ] ) ;
2010+ assert_intersection ( cls1, cls2, class ( & [ ( 'b' , 'c' ) ] ) ) ;
2011+ }
2012+
2013+ #[ test]
2014+ fn class_intersection_many_ranges_in_one_big ( ) {
2015+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' ) ] ) ;
2016+ let cls2 = class ( & [ ( 'a' , 'h' ) ] ) ;
2017+ assert_intersection ( cls1, cls2, class ( & [
2018+ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' )
2019+ ] ) ) ;
2020+ }
2021+
2022+ #[ test]
2023+ fn class_intersection_many_ranges_same ( ) {
2024+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' ) ] ) ;
2025+ let cls2 = class ( & [ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' ) ] ) ;
2026+ assert_intersection ( cls1, cls2, class ( & [
2027+ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' )
2028+ ] ) ) ;
2029+ }
2030+
2031+ #[ test]
2032+ fn class_intersection_multiple_non_intersecting ( ) {
2033+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'g' , 'h' ) ] ) ;
2034+ let cls2 = class ( & [ ( 'd' , 'e' ) , ( 'k' , 'l' ) ] ) ;
2035+ assert_intersection ( cls1, cls2, class ( & [ ] ) ) ;
2036+ }
2037+
2038+ #[ test]
2039+ fn class_intersection_non_intersecting_then_intersecting ( ) {
2040+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' ) ] ) ;
2041+ let cls2 = class ( & [ ( 'h' , 'h' ) ] ) ;
2042+ assert_intersection ( cls1, cls2, class ( & [ ( 'h' , 'h' ) ] ) ) ;
2043+ }
2044+
2045+ #[ test]
2046+ fn class_intersection_adjacent_alternating ( ) {
2047+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'e' , 'f' ) , ( 'i' , 'j' ) ] ) ;
2048+ let cls2 = class ( & [ ( 'c' , 'd' ) , ( 'g' , 'h' ) , ( 'k' , 'l' ) ] ) ;
2049+ assert_intersection ( cls1, cls2, class ( & [ ] ) ) ;
2050+ }
2051+
2052+ #[ test]
2053+ fn class_intersection_overlapping_alternating ( ) {
2054+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'c' , 'd' ) , ( 'e' , 'f' ) ] ) ;
2055+ let cls2 = class ( & [ ( 'b' , 'c' ) , ( 'd' , 'e' ) , ( 'f' , 'g' ) ] ) ;
2056+ assert_intersection ( cls1, cls2, class ( & [ ( 'b' , 'f' ) ] ) ) ;
2057+ }
2058+
19102059 #[ test]
19112060 fn class_canon_overlap_many_case_fold ( ) {
19122061 let cls = class ( & [
@@ -2056,4 +2205,10 @@ mod tests {
20562205 let expr = e ( "(?-u)[-./]" ) ;
20572206 assert_eq ! ( "(?-u:[-\\ .-/])" , expr. to_string( ) ) ;
20582207 }
2208+
2209+ fn assert_intersection ( cls1 : CharClass , cls2 : CharClass , expected : CharClass ) {
2210+ // intersection operation should be commutative
2211+ assert_eq ! ( cls1. intersection( & cls2) , expected) ;
2212+ assert_eq ! ( cls2. intersection( & cls1) , expected) ;
2213+ }
20592214}
0 commit comments