Skip to content

Commit eb6e0e0

Browse files
committed
wip: trying to generalize and make fast enough
1 parent 45b4da3 commit eb6e0e0

File tree

1 file changed

+88
-39
lines changed

1 file changed

+88
-39
lines changed

Sources/_StringProcessing/Engine/MEQuantify.swift

Lines changed: 88 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,32 @@ extension Processor {
6464
}
6565
currentPosition = next
6666
return true
67+
case (.eager, _, nil):
68+
guard let (next, savePointRange) = input.runEagerNOrMoreQuantify(
69+
payload,
70+
asciiBitset: asciiBitset,
71+
at: currentPosition,
72+
limitedBy: end
73+
) else {
74+
assert(nil == input.runGeneralQuantify(
75+
payload,
76+
asciiBitset: asciiBitset,
77+
at: currentPosition,
78+
limitedBy: end))
79+
signalFailure()
80+
return false
81+
}
82+
assert((next, savePointRange) == input.runGeneralQuantify(
83+
payload,
84+
asciiBitset: asciiBitset,
85+
at: currentPosition,
86+
limitedBy: end)!)
87+
if let savePointRange {
88+
savePoints.append(makeQuantifiedSavePoint(
89+
savePointRange, isScalarSemantics: payload.isScalarSemantics))
90+
}
91+
currentPosition = next
92+
return true
6793
case (_, 0, 1):
6894
// FIXME: Is this correct for lazy zero-or-one?
6995
let (next, save) = input.runZeroOrOneQuantify(
@@ -84,19 +110,9 @@ extension Processor {
84110
at: currentPosition,
85111
limitedBy: end
86112
) else {
87-
assert(nil == input.runGeneralQuantify(
88-
payload,
89-
asciiBitset: asciiBitset,
90-
at: currentPosition,
91-
limitedBy: end))
92113
signalFailure()
93114
return false
94115
}
95-
assert((next, savePointRange) == input.runGeneralQuantify(
96-
payload,
97-
asciiBitset: asciiBitset,
98-
at: currentPosition,
99-
limitedBy: end)!)
100116
if let savePointRange {
101117
savePoints.append(makeQuantifiedSavePoint(
102118
savePointRange, isScalarSemantics: payload.isScalarSemantics))
@@ -237,29 +253,42 @@ extension String {
237253
assert(payload.quantKind == .eager
238254
&& payload.minTrips == 0
239255
&& payload.maxExtraTrips == nil)
240-
return doRunEagerZeroOrMoreQuantify(
256+
guard let res = _runEagerNOrMoreQuantify(
241257
payload,
258+
minTrips: 0,
242259
asciiBitset: asciiBitset,
243260
at: currentPosition,
244-
limitedBy: end)
261+
limitedBy: end
262+
) else {
263+
fatalError("Unreachable: zero-or-more always succeeds")
264+
}
265+
266+
return res
245267
}
246268

247-
// NOTE: inline-always so-as to inline into one-or-more call, which makes a
248-
// significant performance difference
269+
/// Specialized n-or-more eager quantification interpreter
270+
///
271+
/// NOTE: inline always makes a huge perf difference for zero-or-more case
249272
@inline(__always)
250-
private func doRunEagerZeroOrMoreQuantify(
273+
fileprivate func _runEagerNOrMoreQuantify(
251274
_ payload: QuantifyPayload,
275+
minTrips: UInt64,
252276
asciiBitset: ASCIIBitset?, // Necessary ugliness...
253277
at currentPosition: Index,
254278
limitedBy end: Index
255-
) -> (Index, savePointRange: Range<Index>?) {
279+
) -> (Index, savePointRange: Range<Index>?)? {
280+
assert(payload.quantKind == .eager)
281+
assert(payload.maxExtraTrips == nil)
282+
assert(minTrips == payload.minTrips)
283+
256284
// Create a quantified save point for every part of the input matched up
257285
// to the final position.
258286
var currentPosition = currentPosition
259287
let isScalarSemantics = payload.isScalarSemantics
260-
let rangeStart = currentPosition
288+
var rangeStart = currentPosition
261289
var rangeEnd = currentPosition
262-
var matchedOnce = false
290+
291+
var numMatches = 0
263292

264293
switch payload.type {
265294
case .asciiBitset:
@@ -273,7 +302,10 @@ extension String {
273302
else {
274303
break
275304
}
276-
matchedOnce = true
305+
numMatches += 1
306+
if numMatches == minTrips {
307+
rangeStart = next
308+
}
277309
rangeEnd = currentPosition
278310
currentPosition = next
279311
assert(currentPosition > rangeEnd)
@@ -290,7 +322,10 @@ extension String {
290322
else {
291323
break
292324
}
293-
matchedOnce = true
325+
numMatches += 1
326+
if numMatches == minTrips {
327+
rangeStart = next
328+
}
294329
rangeEnd = currentPosition
295330
currentPosition = next
296331
assert(currentPosition > rangeEnd)
@@ -310,7 +345,10 @@ extension String {
310345
else {
311346
break
312347
}
313-
matchedOnce = true
348+
numMatches += 1
349+
if numMatches == minTrips {
350+
rangeStart = next
351+
}
314352
rangeEnd = currentPosition
315353
currentPosition = next
316354
assert(currentPosition > rangeEnd)
@@ -326,17 +364,25 @@ extension String {
326364
else {
327365
break
328366
}
329-
matchedOnce = true
367+
numMatches += 1
368+
if numMatches == minTrips {
369+
rangeStart = next
370+
}
330371
rangeEnd = currentPosition
331372
currentPosition = next
332373
assert(currentPosition > rangeEnd)
333374
}
334375
}
335376

336-
guard matchedOnce else {
377+
guard numMatches >= minTrips else {
378+
return nil
379+
}
380+
381+
guard numMatches > minTrips else {
337382
// Consumed no input, no point saved
338383
return (currentPosition, nil)
339384
}
385+
assert(rangeStart <= rangeEnd)
340386

341387
// NOTE: We can't assert that rangeEnd trails currentPosition by one
342388
// position, because newline-sequence in scalar semantic mode still
@@ -346,36 +392,39 @@ extension String {
346392
}
347393

348394
/// Specialized quantify instruction interpreter for `+`
349-
fileprivate func runEagerOneOrMoreQuantify(
395+
fileprivate func runEagerNOrMoreQuantify(
350396
_ payload: QuantifyPayload,
351397
asciiBitset: ASCIIBitset?, // Necessary ugliness...
352398
at currentPosition: Index,
353399
limitedBy end: Index
354400
) -> (Index, savePointRange: Range<Index>?)? {
355401
assert(payload.quantKind == .eager
356-
&& payload.minTrips == 1
357402
&& payload.maxExtraTrips == nil)
358403

359-
// Match at least once
360-
//
361-
// NOTE: Due to newline-sequence in scalar-semantic mode advancing two
362-
// positions, we can't just have doRunEagerZeroOrMoreQuantify return the
363-
// range-end and advance the range-start ourselves. Instead, we do one
364-
// call before looping.
365-
guard let next = doQuantifyMatch(
404+
return _runEagerNOrMoreQuantify(
366405
payload,
406+
minTrips: payload.minTrips,
367407
asciiBitset: asciiBitset,
368408
at: currentPosition,
369-
limitedBy: end
370-
) else {
371-
return nil
372-
}
409+
limitedBy: end)
410+
}
411+
412+
/// Specialized quantify instruction interpreter for `+`
413+
fileprivate func runEagerOneOrMoreQuantify(
414+
_ payload: QuantifyPayload,
415+
asciiBitset: ASCIIBitset?, // Necessary ugliness...
416+
at currentPosition: Index,
417+
limitedBy end: Index
418+
) -> (Index, savePointRange: Range<Index>?)? {
419+
assert(payload.quantKind == .eager
420+
&& payload.minTrips == 1
421+
&& payload.maxExtraTrips == nil)
373422

374-
// Run `a+` as `aa*`
375-
return doRunEagerZeroOrMoreQuantify(
423+
return _runEagerNOrMoreQuantify(
376424
payload,
425+
minTrips: 1,
377426
asciiBitset: asciiBitset,
378-
at: next,
427+
at: currentPosition,
379428
limitedBy: end)
380429
}
381430

0 commit comments

Comments
 (0)