@@ -1555,18 +1555,13 @@ impl<'h> Match<'h> {
15551555
15561556impl < ' h > core:: fmt:: Debug for Match < ' h > {
15571557 fn fmt ( & self , f : & mut core:: fmt:: Formatter ) -> core:: fmt:: Result {
1558+ use regex_automata:: util:: escape:: DebugHaystack ;
1559+
15581560 let mut fmt = f. debug_struct ( "Match" ) ;
1559- fmt. field ( "start" , & self . start ) . field ( "end" , & self . end ) ;
1560- if let Ok ( s) = core:: str:: from_utf8 ( self . as_bytes ( ) ) {
1561- fmt. field ( "bytes" , & s) ;
1562- } else {
1563- // FIXME: It would be nice if this could be printed as a string
1564- // with invalid UTF-8 replaced with hex escapes. A alloc would
1565- // probably okay if that makes it easier, but regex-automata does
1566- // (at time of writing) have internal routines that do this. So
1567- // maybe we should expose them.
1568- fmt. field ( "bytes" , & self . as_bytes ( ) ) ;
1569- }
1561+ fmt. field ( "start" , & self . start )
1562+ . field ( "end" , & self . end )
1563+ . field ( "bytes" , & DebugHaystack ( & self . as_bytes ( ) ) ) ;
1564+
15701565 fmt. finish ( )
15711566 }
15721567}
@@ -2620,3 +2615,88 @@ fn no_expansion<T: AsRef<[u8]>>(replacement: &T) -> Option<Cow<'_, [u8]>> {
26202615 None => Some ( Cow :: Borrowed ( replacement) ) ,
26212616 }
26222617}
2618+
2619+ #[ cfg( test) ]
2620+ mod tests {
2621+ use super :: * ;
2622+ use alloc:: format;
2623+
2624+ #[ test]
2625+ fn test_match_properties ( ) {
2626+ let haystack = b"Hello, world!" ;
2627+ let m = Match :: new ( haystack, 7 , 12 ) ;
2628+
2629+ assert_eq ! ( m. start( ) , 7 ) ;
2630+ assert_eq ! ( m. end( ) , 12 ) ;
2631+ assert_eq ! ( m. is_empty( ) , false ) ;
2632+ assert_eq ! ( m. len( ) , 5 ) ;
2633+ assert_eq ! ( m. as_bytes( ) , b"world" ) ;
2634+ }
2635+
2636+ #[ test]
2637+ fn test_empty_match ( ) {
2638+ let haystack = b"" ;
2639+ let m = Match :: new ( haystack, 0 , 0 ) ;
2640+
2641+ assert_eq ! ( m. is_empty( ) , true ) ;
2642+ assert_eq ! ( m. len( ) , 0 ) ;
2643+ }
2644+
2645+ #[ test]
2646+ fn test_debug_output_valid_utf8 ( ) {
2647+ let haystack = b"Hello, world!" ;
2648+ let m = Match :: new ( haystack, 7 , 12 ) ;
2649+ let debug_str = format ! ( "{:?}" , m) ;
2650+
2651+ assert_eq ! (
2652+ debug_str,
2653+ r#"Match { start: 7, end: 12, bytes: "world" }"#
2654+ ) ;
2655+ }
2656+
2657+ #[ test]
2658+ fn test_debug_output_invalid_utf8 ( ) {
2659+ let haystack = b"Hello, \xFF world!" ;
2660+ let m = Match :: new ( haystack, 7 , 13 ) ;
2661+ let debug_str = format ! ( "{:?}" , m) ;
2662+
2663+ assert_eq ! (
2664+ debug_str,
2665+ r#"Match { start: 7, end: 13, bytes: "\xffworld" }"#
2666+ ) ;
2667+ }
2668+
2669+ #[ test]
2670+ fn test_debug_output_various_unicode ( ) {
2671+ let haystack =
2672+ "Hello, 😊 world! 안녕하세요? مرحبا بالعالم!" . as_bytes ( ) ;
2673+ let m = Match :: new ( haystack, 0 , haystack. len ( ) ) ;
2674+ let debug_str = format ! ( "{:?}" , m) ;
2675+
2676+ assert_eq ! (
2677+ debug_str,
2678+ r#"Match { start: 0, end: 62, bytes: "Hello, 😊 world! 안녕하세요? مرحبا بالعالم!" }"#
2679+ ) ;
2680+ }
2681+
2682+ #[ test]
2683+ fn test_debug_output_ascii_escape ( ) {
2684+ let haystack = b"Hello,\t world!\n This is a \x1b [31mtest\x1b [0m." ;
2685+ let m = Match :: new ( haystack, 0 , haystack. len ( ) ) ;
2686+ let debug_str = format ! ( "{:?}" , m) ;
2687+
2688+ assert_eq ! (
2689+ debug_str,
2690+ r#"Match { start: 0, end: 38, bytes: "Hello,\tworld!\nThis is a \u{1b}[31mtest\u{1b}[0m." }"#
2691+ ) ;
2692+ }
2693+
2694+ #[ test]
2695+ fn test_debug_output_match_in_middle ( ) {
2696+ let haystack = b"The quick brown fox jumps over the lazy dog." ;
2697+ let m = Match :: new ( haystack, 16 , 19 ) ;
2698+ let debug_str = format ! ( "{:?}" , m) ;
2699+
2700+ assert_eq ! ( debug_str, r#"Match { start: 16, end: 19, bytes: "fox" }"# ) ;
2701+ }
2702+ }
0 commit comments