11use std:: {
2+ borrow:: Cow ,
23 collections:: HashMap ,
34 fmt,
45 ops:: Index ,
@@ -8,8 +9,10 @@ use std::{
89
910use log:: debug;
1011use pcre2_sys:: {
11- PCRE2_CASELESS , PCRE2_DOTALL , PCRE2_EXTENDED , PCRE2_MATCH_INVALID_UTF ,
12- PCRE2_MULTILINE , PCRE2_NEVER_UTF , PCRE2_NEWLINE_ANYCRLF , PCRE2_UCP ,
12+ PCRE2_CASELESS , PCRE2_DOTALL , PCRE2_ERROR_NOMEMORY , PCRE2_EXTENDED ,
13+ PCRE2_MATCH_INVALID_UTF , PCRE2_MULTILINE , PCRE2_NEVER_UTF ,
14+ PCRE2_NEWLINE_ANYCRLF , PCRE2_SUBSTITUTE_EXTENDED , PCRE2_SUBSTITUTE_GLOBAL ,
15+ PCRE2_SUBSTITUTE_OVERFLOW_LENGTH , PCRE2_SUBSTITUTE_UNSET_EMPTY , PCRE2_UCP ,
1316 PCRE2_UNSET , PCRE2_UTF ,
1417} ;
1518
@@ -623,6 +626,127 @@ impl<W: CodeUnitWidth> Regex<W> {
623626 pub ( crate ) fn get_capture_names_idxs ( & self ) -> & HashMap < String , usize > {
624627 & self . capture_names_idx
625628 }
629+
630+ /// Replace the first match in the subject string with the replacement
631+ /// If `extended` is true, enable PCRE2's extended replacement syntax.
632+ pub fn replace < ' s > (
633+ & self ,
634+ subject : & ' s [ W :: SubjectChar ] ,
635+ replacement : & [ W :: SubjectChar ] ,
636+ extended : bool ,
637+ ) -> Result < Cow < ' s , [ W :: SubjectChar ] > , Error >
638+ where
639+ [ <W as CodeUnitWidth >:: PCRE2_CHAR ] : ToOwned ,
640+ {
641+ self . replace_impl ( subject, replacement, false , extended)
642+ }
643+
644+ /// Replace all non-overlapping matches in the subject string with the replacement
645+ /// If `extended` is true, enable PCRE2's extended replacement syntax.
646+ pub fn replace_all < ' s > (
647+ & self ,
648+ subject : & ' s [ W :: SubjectChar ] ,
649+ replacement : & [ W :: SubjectChar ] ,
650+ extended : bool ,
651+ ) -> Result < Cow < ' s , [ W :: SubjectChar ] > , Error >
652+ where
653+ [ <W as CodeUnitWidth >:: PCRE2_CHAR ] : ToOwned ,
654+ {
655+ self . replace_impl ( subject, replacement, true , extended)
656+ }
657+
658+ #[ inline]
659+ fn replace_impl < ' s > (
660+ & self ,
661+ subject : & ' s [ W :: SubjectChar ] ,
662+ replacement : & [ W :: SubjectChar ] ,
663+ replace_all : bool ,
664+ extended : bool ,
665+ ) -> Result < Cow < ' s , [ W :: SubjectChar ] > , Error >
666+ where
667+ [ <W as CodeUnitWidth >:: PCRE2_CHAR ] : ToOwned ,
668+ {
669+ let mut options: u32 = 0 ;
670+ options |= PCRE2_SUBSTITUTE_OVERFLOW_LENGTH ;
671+ // TODO: this should probably be configurable from user-side
672+ options |= PCRE2_SUBSTITUTE_UNSET_EMPTY ;
673+ if extended {
674+ options |= PCRE2_SUBSTITUTE_EXTENDED ;
675+ }
676+ if replace_all {
677+ options |= PCRE2_SUBSTITUTE_GLOBAL ;
678+ }
679+
680+ // We prefer to allocate on the stack but fall back to the heap.
681+ // Note that PCRE2 has the following behavior with PCRE2_SUBSTITUTE_OVERFLOW_LENGTH:
682+ // - We supply the initial output buffer size in `capacity`. This should have sufficient
683+ // capacity for the terminating NUL character.
684+ // - If the capacity is NOT sufficient, PCRE2 returns the new required capacity, also
685+ // including the terminating NUL character.
686+ // - If the capacity IS sufficient, PCRE2 returns the number of characters written, NOT
687+ // including the terminating NUL character.
688+ // Example: our initial capacity is 256. If the returned string needs to be of length 512,
689+ // then PCRE2 will report NOMEMORY and set capacity to 513. After reallocating we pass in
690+ // a capacity of 513; it succeeds and sets capacity to 512, which is the length of the result.
691+ let mut stack_storage: [ W :: PCRE2_CHAR ; 256 ] =
692+ [ W :: PCRE2_CHAR :: default ( ) ; 256 ] ;
693+ let mut heap_storage = Vec :: new ( ) ;
694+ let mut output = stack_storage. as_mut ( ) ;
695+ let mut capacity = output. len ( ) ;
696+
697+ let mut rc = unsafe {
698+ self . code . substitute (
699+ subject,
700+ replacement,
701+ 0 ,
702+ options,
703+ output,
704+ & mut capacity,
705+ )
706+ } ;
707+
708+ if let Err ( e) = & rc {
709+ if e. code ( ) == PCRE2_ERROR_NOMEMORY {
710+ if heap_storage. try_reserve_exact ( capacity) . is_err ( ) {
711+ return Err ( rc. unwrap_err ( ) ) ;
712+ }
713+ heap_storage. resize ( capacity, W :: PCRE2_CHAR :: default ( ) ) ;
714+ output = & mut heap_storage;
715+ capacity = output. len ( ) ;
716+ rc = unsafe {
717+ self . code . substitute (
718+ subject,
719+ replacement,
720+ 0 ,
721+ options,
722+ output,
723+ & mut capacity,
724+ )
725+ } ;
726+ }
727+ }
728+
729+ let s = match rc? {
730+ 0 => Cow :: Borrowed ( subject) ,
731+ _ => {
732+ // capacity has been updated with the length of the result (excluding nul terminator).
733+ let output = & output[ ..capacity] ;
734+
735+ // All inputs contained valid chars, so we expect all outputs to as well.
736+ let to_char = |c : W :: PCRE2_CHAR | -> W :: SubjectChar {
737+ c. try_into ( ) . unwrap_or_else ( |_| {
738+ panic ! ( "all output expected to be valid chars" )
739+ } )
740+ } ;
741+
742+ // this is really just a type cast
743+ let x: Vec < W :: SubjectChar > =
744+ output. iter ( ) . copied ( ) . map ( to_char) . collect ( ) ;
745+ Cow :: Owned ( x)
746+ }
747+ } ;
748+ Ok ( s)
749+ }
626750}
627751
628752/// Advanced or "lower level" search methods.
@@ -870,7 +994,7 @@ impl<W: CodeUnitWidth> CaptureLocations<W> {
870994 }
871995}
872996
873- /// Captures represents a group of captured byte strings for a single match.
997+ /// ` Captures` represents a group of captured strings for a single match.
874998///
875999/// The 0th capture always corresponds to the entire match. Each subsequent
8761000/// index corresponds to the next capture group in the regex. If a capture
0 commit comments