Skip to content

Commit 15706a4

Browse files
committed
wip
1 parent 1687315 commit 15706a4

File tree

1 file changed

+51
-39
lines changed

1 file changed

+51
-39
lines changed

Sources/_StringProcessing/Engine/MEQuantify.swift

Lines changed: 51 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
@_implementationOnly import _RegexParser
12
private typealias ASCIIBitset = DSLTree.CustomCharacterClass.AsciiBitset
23

34
extension Processor {
@@ -100,45 +101,56 @@ extension Processor {
100101
}
101102

102103
extension String {
103-
fileprivate func doQuantifyMatch(
104-
_ payload: QuantifyPayload,
105-
asciiBitset: ASCIIBitset?, // Necessary ugliness...
104+
fileprivate func matchQuantifiedASCIIBitset(
105+
_ asciiBitset: ASCIIBitset,
106106
at currentPosition: Index,
107-
limitedBy end: Index
108-
) -> Index? {
109-
let isScalarSemantics = payload.isScalarSemantics
107+
limitedBy end: Index,
108+
minMatches: UInt64,
109+
maxMatches: UInt64,
110+
quantificationKind: AST.Quantification.Kind,
111+
isScalarSemantics: Bool
112+
) -> (next: Index, savePointRange: Range<Index>?)? {
113+
// Create a quantified save point for every part of the input
114+
// (after minTrips) matched up to the final position.
115+
var currentPosition = currentPosition
116+
var rangeStart = currentPosition
117+
var rangeEnd = currentPosition
110118

111-
switch payload.type {
112-
case .asciiBitset:
113-
assert(asciiBitset != nil, "Invariant: needs to be passed in")
114-
return matchASCIIBitset(
115-
asciiBitset!,
116-
at: currentPosition,
117-
limitedBy: end,
118-
isScalarSemantics: isScalarSemantics)
119-
case .asciiChar:
120-
return matchScalar(
121-
UnicodeScalar.init(_value: UInt32(payload.asciiChar)),
122-
at: currentPosition,
123-
limitedBy: end,
124-
boundaryCheck: !isScalarSemantics,
125-
isCaseInsensitive: false)
126-
case .builtin:
127-
// We only emit .quantify if it consumes a single character
128-
return matchBuiltinCC(
129-
payload.builtin,
130-
at: currentPosition,
131-
limitedBy: end,
132-
isInverted: payload.builtinIsInverted,
133-
isStrictASCII: payload.builtinIsStrict,
134-
isScalarSemantics: isScalarSemantics)
135-
case .any:
136-
return matchRegexDot(
119+
var numMatches = 0
120+
121+
while numMatches < maxMatches {
122+
guard let next = matchASCIIBitset(
123+
asciiBitset,
137124
at: currentPosition,
138125
limitedBy: end,
139-
anyMatchesNewline: payload.anyMatchesNewline,
140126
isScalarSemantics: isScalarSemantics)
127+
else {
128+
break
129+
}
130+
numMatches &+= 1
131+
if numMatches == minMatches {
132+
rangeStart = next
133+
}
134+
rangeEnd = currentPosition
135+
currentPosition = next
136+
assert(currentPosition > rangeEnd)
141137
}
138+
139+
guard numMatches >= minMatches else {
140+
return nil
141+
}
142+
143+
guard quantificationKind == .eager && numMatches > minMatches else {
144+
// Consumed no input, no point saved
145+
return (currentPosition, nil)
146+
}
147+
assert(rangeStart <= rangeEnd)
148+
149+
// NOTE: We can't assert that rangeEnd trails currentPosition by one
150+
// position, because newline-sequence in scalar semantic mode still
151+
// matches two scalars
152+
153+
return (currentPosition, rangeStart..<rangeEnd)
142154
}
143155

144156
/// Generic quantify instruction interpreter
@@ -209,8 +221,8 @@ extension String {
209221
assert(minTrips == payload.minTrips)
210222
assert(minTrips + (payload.maxExtraTrips ?? (UInt64.max - minTrips)) == maxTrips)
211223

212-
// Create a quantified save point for every part of the input matched up
213-
// to the final position.
224+
// Create a quantified save point for every part of the input
225+
// (after minTrips) matched up to the final position.
214226
var currentPosition = currentPosition
215227
let isScalarSemantics = payload.isScalarSemantics
216228
var rangeStart = currentPosition
@@ -230,7 +242,7 @@ extension String {
230242
else {
231243
break
232244
}
233-
numMatches += 1
245+
numMatches &+= 1
234246
if numMatches == minTrips {
235247
rangeStart = next
236248
}
@@ -250,7 +262,7 @@ extension String {
250262
else {
251263
break
252264
}
253-
numMatches += 1
265+
numMatches &+= 1
254266
if numMatches == minTrips {
255267
rangeStart = next
256268
}
@@ -273,7 +285,7 @@ extension String {
273285
else {
274286
break
275287
}
276-
numMatches += 1
288+
numMatches &+= 1
277289
if numMatches == minTrips {
278290
rangeStart = next
279291
}
@@ -292,7 +304,7 @@ extension String {
292304
else {
293305
break
294306
}
295-
numMatches += 1
307+
numMatches &+= 1
296308
if numMatches == minTrips {
297309
rangeStart = next
298310
}

0 commit comments

Comments
 (0)