@@ -44,55 +44,71 @@ Re2::Re2(const std::string& pattern)
4444{
4545}
4646
47- std::list<RegexMatch> Re2::searchAll (const std::string& s) const {
48- std::list<RegexMatch> retList;
49-
50- re2::StringPiece subject (s);
51-
52- size_t offset = 0 ;
53- while (offset <= s.size ()) {
54- int ngroups = re.NumberOfCapturingGroups () + 1 ;
55- re2::StringPiece submatches[ngroups];
47+ static bool do_match (
48+ const RE2 &re,
49+ const char *s,
50+ size_t n,
51+ RegexMatch *m,
52+ ssize_t max_groups,
53+ size_t offset)
54+ {
55+ if (m == nullptr ) {
56+ max_groups = 0 ;
57+ }
5658
57- if (!re.Match (subject, offset, s.size (), RE2::UNANCHORED,
58- &submatches[0 ], ngroups)) {
59- break ;
59+ // "+1" is required for full match (aka group 0)
60+ size_t ngroups = re.NumberOfCapturingGroups () + 1 ;
61+ if (max_groups >= 0 && max_groups < ngroups) {
62+ ngroups = max_groups;
63+ }
64+ re2::StringPiece submatches[ngroups];
65+
66+ if (re.Match (re2::StringPiece (s, n), offset, n, RE2::UNANCHORED,
67+ &submatches[0 ], ngroups)) {
68+ if (ngroups != 0 ) {
69+ RegexMatch::MatchGroupContainer groups;
70+ groups.reserve (ngroups);
71+ for (size_t i = 0 ; i < ngroups; i++) {
72+ size_t start = submatches[i].data () - s;
73+ std::string group = submatches[i].as_string ();
74+ groups.push_back (MatchGroup{start, std::move (group)});
75+ }
76+ *m = RegexMatch (std::move (groups));
6077 }
78+ return true ;
79+ }
80+ return false ;
81+ }
6182
62- for (int i = 0 ; i < ngroups; i++) {
63- // N.B. StringPiece::as_string returns value, not reference
64- auto match_string = submatches[i].as_string ();
65- auto start = &submatches[i][0 ] - &subject[0 ];
66- retList.push_front (RegexMatch (std::move (match_string), start));
67- }
83+ std::vector<RegexMatch> Re2::searchAll (const std::string& s, bool overlapping) const {
84+ std::vector<RegexMatch> res;
85+ size_t offset = 0 ;
6886
69- offset = (&submatches[0 ][0 ] - &subject[0 ]) + submatches[0 ].length ();
70- if (submatches[0 ].size () == 0 ) {
71- offset++;
87+ while (1 ) {
88+ RegexMatch m;
89+ bool match = do_match (re, s.data (), s.size (), &m, -1 , offset);
90+ if (!match) break ;
91+
92+ if (overlapping) {
93+ // start just after the beginning of the last match
94+ offset = m.group (0 ).offset + 1 ;
95+ } else {
96+ // start just at the end of the last match
97+ offset = m.group (0 ).offset + m.group (0 ).string .size ();
98+ if (offset == m.group (0 ).offset ) {
99+ // empty match - advance by one to not match empty string repeatedly
100+ offset++;
101+ }
72102 }
103+ res.push_back (std::move (m));
73104 }
74-
75- return retList;
105+ return res;
76106}
77107
78- int Re2::search (const std::string& s, RegexMatch *match) const {
79- re2::StringPiece subject (s);
80- re2::StringPiece submatches[1 ];
81- if (re.Match (subject, 0 , s.size (), RE2::UNANCHORED, &submatches[0 ], 1 )) {
82- // N.B. StringPiece::as_string returns value, not reference
83- auto match_string = submatches[0 ].as_string ();
84- auto start = &submatches[0 ][0 ] - &subject[0 ];
85- *match = RegexMatch (std::move (match_string), start);
86- return 1 ;
87- } else {
88- return 0 ;
89- }
108+ bool Re2::search (const std::string &s, RegexMatch *m, ssize_t max_groups) const {
109+ return do_match (re, s.data (), s.size (), m, max_groups, 0 );
90110}
91111
92- int Re2::search (const std::string& s) const {
93- re2::StringPiece subject (s);
94- return re.Match (subject, 0 , s.size (), RE2::UNANCHORED, NULL , 0 );
95- }
96112#endif
97113
98114} // namespace backend
0 commit comments