1717#include < iostream>
1818#include < fstream>
1919#include < string>
20- #include < list>
2120
2221
2322#include " src/regex/backend/pcre.h"
@@ -46,6 +45,8 @@ Pcre::Pcre(const std::string& pattern_)
4645 &errptr, &erroffset, NULL );
4746
4847 m_pce = pcre_study (m_pc, pcre_study_opt, &errptr);
48+
49+ pcre_fullinfo (m_pc, m_pce, PCRE_INFO_CAPTURECOUNT, &m_capture_count);
4950}
5051
5152
@@ -64,60 +65,76 @@ Pcre::~Pcre() {
6465 }
6566}
6667
68+ static bool do_match (
69+ pcre *pc,
70+ pcre_extra *pce,
71+ int pcre_capture_count,
72+ const char *s,
73+ size_t n,
74+ RegexMatch *m,
75+ ssize_t max_groups,
76+ size_t offset)
77+ {
78+ if (m == nullptr ) {
79+ max_groups = 0 ;
80+ }
6781
68- std::list<RegexMatch> Pcre::searchAll (const std::string& s) const {
69- const char *subject = s.c_str ();
70- const std::string tmpString = std::string (s.c_str (), s.size ());
71- int ovector[OVECCOUNT];
72- int rc, i, offset = 0 ;
73- std::list<RegexMatch> retList;
74-
75- do {
76- rc = pcre_exec (m_pc, m_pce, subject,
77- s.size (), offset, 0 , ovector, OVECCOUNT);
78-
79- for (i = 0 ; i < rc; i++) {
80- size_t start = ovector[2 *i];
81- size_t end = ovector[2 *i+1 ];
82- size_t len = end - start;
83- if (end > s.size ()) {
84- rc = 0 ;
85- break ;
86- }
87- std::string match = std::string (tmpString, start, len);
88- offset = start + len;
89- retList.push_front (RegexMatch (match, start));
90- }
82+ // "+1" is required for full match (aka group 0)
83+ int ovecsize = (pcre_capture_count+1 ) * 3 ;
84+ int ovector[ovecsize];
85+ int ret = pcre_exec (pc, pce, s, n, offset, 0 , ovector, ovecsize);
9186
92- offset = ovector[ 1 ]; // end
93- if (offset == ovector[ 0 ] ) { // start == end (size == 0)
94- offset++ ;
87+ if (ret > 0 ) {
88+ if (max_groups < 0 ) {
89+ max_groups = ret ;
9590 }
96- } while (rc > 0 );
97-
98- return retList;
99- }
100-
10191
102- int Pcre::search (const std::string& s, RegexMatch *match) const {
103- int ovector[OVECCOUNT];
104- int ret = pcre_exec (m_pc, m_pce, s.c_str (),
105- s.size (), 0 , 0 , ovector, OVECCOUNT) > 0 ;
92+ if (max_groups > 0 ) {
93+ size_t ngroups = std::min<size_t >(max_groups, ret);
94+ RegexMatch::MatchGroupContainer groups;
95+ groups.reserve (ngroups);
96+ for (size_t i = 0 ; i < ngroups; i++) {
97+ size_t start = ovector[2 *i];
98+ size_t end = ovector[2 *i+1 ];
99+ std::string group (s + start, end - start);
106100
107- if (ret > 0 ) {
108- *match = RegexMatch (
109- std::string (s, ovector[ret-1 ], ovector[ret] - ovector[ret-1 ]),
110- 0 );
101+ groups.push_back (MatchGroup{start, std::move (group)});
102+ }
103+ *m = RegexMatch (std::move (groups));
104+ }
105+ return true ;
111106 }
107+ return false ;
112108
113- return ret;
114109}
115110
111+ std::vector<RegexMatch> Pcre::searchAll (const std::string& s, bool overlapping) const {
112+ std::vector<RegexMatch> res;
113+ size_t offset = 0 ;
114+
115+ while (1 ) {
116+ RegexMatch m;
117+ bool match = do_match (m_pc, m_pce, m_capture_count, s.data (), s.size (), &m, -1 , offset);
118+ if (!match) break ;
119+
120+ if (overlapping) {
121+ // start just after the beginning of the last match
122+ offset = m.group (0 ).offset + 1 ;
123+ } else {
124+ // start just at the end of the last match
125+ offset = m.group (0 ).offset + m.group (0 ).string .size ();
126+ if (offset == m.group (0 ).offset ) {
127+ // empty match - advance by one to not match empty string repeatedly
128+ offset++;
129+ }
130+ }
131+ res.push_back (std::move (m));
132+ }
133+ return res;
134+ }
116135
117- int Pcre::search (const std::string& s) const {
118- int ovector[OVECCOUNT];
119- return pcre_exec (m_pc, m_pce, s.c_str (),
120- s.size (), 0 , 0 , ovector, OVECCOUNT) > 0 ;
136+ bool Pcre::search (const std::string &s, RegexMatch *m, ssize_t max_groups) const {
137+ return do_match (m_pc, m_pce, m_capture_count, s.data (), s.size (), m, max_groups, 0 );
121138}
122139
123140#endif
0 commit comments