@@ -16,6 +16,7 @@ use crate::{
1616 strategy:: { self , Strategy } ,
1717 wrappers,
1818 } ,
19+ nfa:: thompson:: WhichCaptures ,
1920 util:: {
2021 captures:: { Captures , GroupInfo } ,
2122 iter,
@@ -2429,6 +2430,7 @@ pub struct Config {
24292430 utf8_empty : Option < bool > ,
24302431 autopre : Option < bool > ,
24312432 pre : Option < Option < Prefilter > > ,
2433+ which_captures : Option < WhichCaptures > ,
24322434 nfa_size_limit : Option < Option < usize > > ,
24332435 onepass_size_limit : Option < Option < usize > > ,
24342436 hybrid_cache_capacity : Option < usize > ,
@@ -2619,6 +2621,75 @@ impl Config {
26192621 Config { pre : Some ( pre) , ..self }
26202622 }
26212623
2624+ /// Configures what kinds of groups are compiled as "capturing" in the
2625+ /// underlying regex engine.
2626+ ///
2627+ /// This is set to [`WhichCaptures::All`] by default. Callers may wish to
2628+ /// use [`WhichCaptures::Implicit`] in cases where one wants avoid the
2629+ /// overhead of capture states for explicit groups.
2630+ ///
2631+ /// Note that another approach to avoiding the overhead of capture groups
2632+ /// is by using non-capturing groups in the regex pattern. That is,
2633+ /// `(?:a)` instead of `(a)`. This option is useful when you can't control
2634+ /// the concrete syntax but know that you don't need the underlying capture
2635+ /// states. For example, using `WhichCaptures::Implicit` will behave as if
2636+ /// all explicit capturing groups in the pattern were non-capturing.
2637+ ///
2638+ /// Setting this to `WhichCaptures::None` may result in an error when
2639+ /// building a meta regex.
2640+ ///
2641+ /// # Example
2642+ ///
2643+ /// This example demonstrates how the results of capture groups can change
2644+ /// based on this option. First we show the default (all capture groups in
2645+ /// the pattern are capturing):
2646+ ///
2647+ /// ```
2648+ /// use regex_automata::{meta::Regex, Match, Span};
2649+ ///
2650+ /// let re = Regex::new(r"foo([0-9]+)bar")?;
2651+ /// let hay = "foo123bar";
2652+ ///
2653+ /// let mut caps = re.create_captures();
2654+ /// re.captures(hay, &mut caps);
2655+ /// assert_eq!(Some(Span::from(0..9)), caps.get_group(0));
2656+ /// assert_eq!(Some(Span::from(3..6)), caps.get_group(1));
2657+ ///
2658+ /// Ok::<(), Box<dyn std::error::Error>>(())
2659+ /// ```
2660+ ///
2661+ /// And now we show the behavior when we only include implicit capture
2662+ /// groups. In this case, we can only find the overall match span, but the
2663+ /// spans of any other explicit group don't exist because they are treated
2664+ /// as non-capturing. (In effect, when `WhichCaptures::Implicit` is used,
2665+ /// there is no real point in using [`Regex::captures`] since it will never
2666+ /// be able to report more information than [`Regex::find`].)
2667+ ///
2668+ /// ```
2669+ /// use regex_automata::{
2670+ /// meta::Regex,
2671+ /// nfa::thompson::WhichCaptures,
2672+ /// Match,
2673+ /// Span,
2674+ /// };
2675+ ///
2676+ /// let re = Regex::builder()
2677+ /// .configure(Regex::config().which_captures(WhichCaptures::Implicit))
2678+ /// .build(r"foo([0-9]+)bar")?;
2679+ /// let hay = "foo123bar";
2680+ ///
2681+ /// let mut caps = re.create_captures();
2682+ /// re.captures(hay, &mut caps);
2683+ /// assert_eq!(Some(Span::from(0..9)), caps.get_group(0));
2684+ /// assert_eq!(None, caps.get_group(1));
2685+ ///
2686+ /// Ok::<(), Box<dyn std::error::Error>>(())
2687+ /// ```
2688+ pub fn which_captures ( mut self , which_captures : WhichCaptures ) -> Config {
2689+ self . which_captures = Some ( which_captures) ;
2690+ self
2691+ }
2692+
26222693 /// Sets the size limit, in bytes, to enforce on the construction of every
26232694 /// NFA build by the meta regex engine.
26242695 ///
@@ -2983,6 +3054,14 @@ impl Config {
29833054 self . pre . as_ref ( ) . unwrap_or ( & None ) . as_ref ( )
29843055 }
29853056
3057+ /// Returns the capture configuration, as set by
3058+ /// [`Config::which_captures`].
3059+ ///
3060+ /// If it was not explicitly set, then a default value is returned.
3061+ pub fn get_which_captures ( & self ) -> WhichCaptures {
3062+ self . which_captures . unwrap_or ( WhichCaptures :: All )
3063+ }
3064+
29863065 /// Returns NFA size limit, as set by [`Config::nfa_size_limit`].
29873066 ///
29883067 /// If it was not explicitly set, then a default value is returned.
@@ -3126,6 +3205,7 @@ impl Config {
31263205 utf8_empty : o. utf8_empty . or ( self . utf8_empty ) ,
31273206 autopre : o. autopre . or ( self . autopre ) ,
31283207 pre : o. pre . or_else ( || self . pre . clone ( ) ) ,
3208+ which_captures : o. which_captures . or ( self . which_captures ) ,
31293209 nfa_size_limit : o. nfa_size_limit . or ( self . nfa_size_limit ) ,
31303210 onepass_size_limit : o
31313211 . onepass_size_limit
0 commit comments