@@ -183,13 +183,155 @@ extension BidirectionalCollection {
183183
184184// MARK: Regex algorithms
185185
186+ @available ( SwiftStdlib 5 . 7 , * )
187+ struct RegexMatchesCollection < Output> {
188+ let input : Substring
189+ let regex : Regex < Output >
190+ let startIndex : Index
191+
192+ init ( base: Substring , regex: Regex < Output > ) {
193+ self . input = base
194+ self . regex = regex
195+ self . startIndex = base. firstMatch ( of: regex) . map ( Index . match) ?? . end
196+ }
197+ }
198+
199+ @available ( SwiftStdlib 5 . 7 , * )
200+ extension RegexMatchesCollection : Sequence {
201+ /// Returns the index to start searching for the next match after `match`.
202+ fileprivate func searchIndex( after match: Regex < Output > . Match ) -> String . Index ? {
203+ if !match. range. isEmpty {
204+ return match. range. upperBound
205+ }
206+
207+ // If the last match was an empty match, advance by one position and
208+ // run again, unless at the end of `input`.
209+ if match. range. lowerBound == input. endIndex {
210+ return nil
211+ }
212+
213+ switch regex. initialOptions. semanticLevel {
214+ case . graphemeCluster:
215+ return input. index ( after: match. range. upperBound)
216+ case . unicodeScalar:
217+ return input. unicodeScalars. index ( after: match. range. upperBound)
218+ }
219+ }
220+
221+ struct Iterator : IteratorProtocol {
222+ let base : RegexMatchesCollection
223+
224+ // Because `RegexMatchesCollection` eagerly computes the first match for
225+ // its `startIndex`, the iterator can use that match for its initial
226+ // iteration. For subsequent calls to `next()`, this value is `false`, and
227+ // `nextStart` is used to search for the next match.
228+ var initialIteration = true
229+ var nextStart : String . Index ?
230+
231+ init ( _ matches: RegexMatchesCollection ) {
232+ self . base = matches
233+ self . nextStart = base. startIndex. match. flatMap ( base. searchIndex ( after: ) )
234+ }
235+
236+ mutating func next( ) -> Regex < Output > . Match ? {
237+ // Initial case with pre-computed first match
238+ if initialIteration {
239+ initialIteration = false
240+ return base. startIndex. match
241+ }
242+
243+ // `nextStart` is `nil` when iteration has completed
244+ guard let start = nextStart else {
245+ return nil
246+ }
247+
248+ // Otherwise, find the next match (if any) and compute `nextStart`
249+ let match = try ? base. regex. firstMatch ( in: base. input [ start... ] )
250+ nextStart = match. flatMap ( base. searchIndex ( after: ) )
251+ return match
252+ }
253+ }
254+
255+ func makeIterator( ) -> Iterator {
256+ Iterator ( self )
257+ }
258+ }
259+
260+ @available ( SwiftStdlib 5 . 7 , * )
261+ extension RegexMatchesCollection : Collection {
262+ enum Index : Comparable {
263+ case match( Regex < Output > . Match )
264+ case end
265+
266+ var match : Regex < Output > . Match ? {
267+ switch self {
268+ case . match( let match) : return match
269+ case . end: return nil
270+ }
271+ }
272+
273+ static func == ( lhs: Self , rhs: Self ) -> Bool {
274+ switch ( lhs, rhs) {
275+ case ( . match( let lhs) , . match( let rhs) ) :
276+ return lhs. range == rhs. range
277+ case ( . end, . end) :
278+ return true
279+ case ( . end, . match) , ( . match, . end) :
280+ return false
281+ }
282+ }
283+
284+ static func < ( lhs: Self , rhs: Self ) -> Bool {
285+ switch ( lhs, rhs) {
286+ case ( . match( let lhs) , . match( let rhs) ) :
287+ // This implementation uses a tuple comparison so that an empty
288+ // range `i..<i` will be ordered before a non-empty range at that
289+ // same starting point `i..<j`. As of 2022-05-30, `Regex` does not
290+ // return matches of this kind, but that is one behavior under
291+ // discussion for regexes like /a*|b/ when matched against "b".
292+ return ( lhs. range. lowerBound, lhs. range. upperBound)
293+ < ( rhs. range. lowerBound, rhs. range. upperBound)
294+ case ( . match, . end) :
295+ return true
296+ case ( . end, . match) , ( . end, . end) :
297+ return false
298+ }
299+ }
300+ }
301+
302+ var endIndex : Index {
303+ Index . end
304+ }
305+
306+ func index( after i: Index ) -> Index {
307+ guard let currentMatch = i. match else {
308+ fatalError ( " Can't advance past the 'endIndex' of a match collection. " )
309+ }
310+
311+ guard
312+ let start = searchIndex ( after: currentMatch) ,
313+ let nextMatch = try ? regex. firstMatch ( in: input [ start... ] )
314+ else {
315+ return . end
316+ }
317+ return Index . match ( nextMatch)
318+ }
319+
320+ subscript( position: Index ) -> Regex < Output > . Match {
321+ guard let match = position. match else {
322+ fatalError ( " Can't subscript the 'endIndex' of a match collection. " )
323+ }
324+ return match
325+ }
326+ }
327+
186328extension BidirectionalCollection where SubSequence == Substring {
187329 @available ( SwiftStdlib 5 . 7 , * )
188330 @_disfavoredOverload
189331 func _matches< R: RegexComponent > (
190332 of regex: R
191- ) -> MatchesCollection < RegexConsumer < R , Self > > {
192- _matches ( of : RegexConsumer ( regex) )
333+ ) -> RegexMatchesCollection < R . RegexOutput > {
334+ RegexMatchesCollection ( base : self [ ... ] , regex: regex . regex )
193335 }
194336
195337 @available ( SwiftStdlib 5 . 7 , * )
@@ -207,30 +349,6 @@ extension BidirectionalCollection where SubSequence == Substring {
207349 public func matches< Output> (
208350 of r: some RegexComponent < Output >
209351 ) -> [ Regex < Output > . Match ] {
210- let slice = self [ ... ]
211- var start = self . startIndex
212- let end = self . endIndex
213- let regex = r. regex
214-
215- var result = [ Regex < Output > . Match] ( )
216- while start <= end {
217- guard let match = try ? regex. _firstMatch (
218- slice. base, in: start..< end
219- ) else {
220- break
221- }
222- result. append ( match)
223- if match. range. isEmpty {
224- if match. range. upperBound == end {
225- break
226- }
227- // FIXME: semantic level
228- start = slice. index ( after: match. range. upperBound)
229- } else {
230- start = match. range. upperBound
231- }
232- }
233- return result
352+ Array ( _matches ( of: r) )
234353 }
235-
236354}
0 commit comments