@@ -45,6 +45,8 @@ Pcre::Pcre(const std::string& pattern_)
4545 &errptr, &erroffset, NULL );
4646
4747 m_pce = pcre_study (m_pc, pcre_study_opt, &errptr);
48+
49+ pcre_fullinfo (m_pc, m_pce, PCRE_INFO_CAPTURECOUNT, &m_capture_count);
4850}
4951
5052
@@ -63,64 +65,79 @@ Pcre::~Pcre() {
6365 }
6466}
6567
68+ static bool do_match (
69+ pcre *pc,
70+ pcre_extra *pce,
71+ int pcre_capture_count,
72+ const char *s,
73+ size_t n,
74+ RegexMatch *m,
75+ ssize_t max_groups,
76+ size_t offset)
77+ {
78+ if (m == nullptr ) {
79+ max_groups = 0 ;
80+ }
6681
67- std::list<RegexMatch> Pcre::searchAll (const std::string& s) const {
68- const char *subject = s.c_str ();
69- const std::string tmpString = std::string (s.c_str (), s.size ());
70- int ovector[OVECCOUNT];
71- int rc, i, offset = 0 ;
72- std::list<RegexMatch> retList;
73-
74- do {
75- rc = pcre_exec (m_pc, m_pce, subject,
76- s.size (), offset, 0 , ovector, OVECCOUNT);
77-
78- for (i = 0 ; i < rc; i++) {
79- size_t start = ovector[2 *i];
80- size_t end = ovector[2 *i+1 ];
81- size_t len = end - start;
82- if (end > s.size ()) {
83- rc = 0 ;
84- break ;
85- }
86- std::string match = std::string (tmpString, start, len);
87- offset = start + len;
88- retList.push_front (RegexMatch (match, start));
82+ // "+1" is required for full match (aka group 0)
83+ int ovecsize = (pcre_capture_count+1 ) * 3 ;
84+ int ovector[ovecsize];
85+ int ret = pcre_exec (pc, pce, s, n, offset, 0 , ovector, ovecsize);
8986
90- if (len == 0 ) {
91- rc = 0 ;
92- break ;
93- }
87+ if (ret > 0 ) {
88+ if (max_groups < 0 ) {
89+ max_groups = ret;
9490 }
95- } while (rc > 0 );
96-
97- return retList;
98- }
99-
10091
101- int Pcre::search (const std::string& s, RegexMatch *match) const {
102- int ovector[OVECCOUNT];
103- int ret = pcre_exec (m_pc, m_pce, s.c_str (),
104- s.size (), 0 , 0 , ovector, OVECCOUNT) > 0 ;
92+ if (max_groups > 0 ) {
93+ size_t ngroups = std::min<size_t >(max_groups, ret);
94+ RegexMatch::MatchGroupContainer groups;
95+ groups.reserve (ngroups);
96+ for (size_t i = 0 ; i < ngroups; i++) {
97+ size_t start = ovector[2 *i];
98+ size_t end = ovector[2 *i+1 ];
99+ std::string group (s + start, end - start);
105100
106- if (ret > 0 ) {
107- *match = RegexMatch (
108- std::string (s, ovector[ret-1 ], ovector[ret] - ovector[ret-1 ]),
109- 0 );
101+ groups.push_back (MatchGroup{start, std::move (group)});
102+ }
103+ *m = RegexMatch (std::move (groups));
104+ }
105+ return true ;
110106 }
107+ return false ;
111108
112- return ret;
113109}
114110
111+ std::vector<RegexMatch> Pcre::searchAll (const std::string& s, bool overlapping) const {
112+ std::vector<RegexMatch> res;
113+ size_t offset = 0 ;
114+
115+ while (1 ) {
116+ RegexMatch m;
117+ bool match = do_match (m_pc, m_pce, m_capture_count, s.data (), s.size (), &m, -1 , offset);
118+ if (!match) break ;
119+
120+ if (overlapping) {
121+ // start just after the beginning of the last match
122+ offset = m.group (0 ).offset + 1 ;
123+ } else {
124+ // start just at the end of the last match
125+ offset = m.group (0 ).offset + m.group (0 ).string .size ();
126+ if (offset == m.group (0 ).offset ) {
127+ // empty match - advance by one to not match empty string repeatedly
128+ offset++;
129+ }
130+ }
131+ res.push_back (std::move (m));
132+ }
133+ return res;
134+ }
115135
116- int Pcre::search (const std::string& s) const {
117- int ovector[OVECCOUNT];
118- return pcre_exec (m_pc, m_pce, s.c_str (),
119- s.size (), 0 , 0 , ovector, OVECCOUNT) > 0 ;
136+ bool Pcre::search (const std::string &s, RegexMatch *m, ssize_t max_groups) const {
137+ return do_match (m_pc, m_pce, m_capture_count, s.data (), s.size (), m, max_groups, 0 );
120138}
121139
122140
123141} // namespace backend
124142} // namespace regex
125143} // namespace modsecurity
126-
0 commit comments