Skip to content

Commit c2b93ab

Browse files
committed
Speed up quantification fast paths by unswitching the loop
1 parent 4e742b4 commit c2b93ab

File tree

3 files changed

+162
-55
lines changed

3 files changed

+162
-55
lines changed

Sources/_StringProcessing/ConsumerInterface.swift

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -391,9 +391,8 @@ extension DSLTree.CustomCharacterClass.Member {
391391

392392
return { input, bounds in
393393
let curIdx = bounds.lowerBound
394-
let nextIndex = isCharacterSemantic
395-
? input.index(after: curIdx)
396-
: input.unicodeScalars.index(after: curIdx)
394+
let nextIndex = input.index(
395+
after: curIdx, isScalarSemantics: !isCharacterSemantic)
397396

398397
// Under grapheme semantics, we compare based on single NFC scalars. If
399398
// such a character is not single scalar under NFC, the match fails. In
@@ -603,9 +602,9 @@ extension AST.Atom.CharacterProperty {
603602
if p(input, bounds) != nil { return nil }
604603

605604
// TODO: bounds check
606-
return opts.semanticLevel == .graphemeCluster
607-
? input.index(after: bounds.lowerBound)
608-
: input.unicodeScalars.index(after: bounds.lowerBound)
605+
return input.index(
606+
after: bounds.lowerBound,
607+
isScalarSemantics: opts.semanticLevel == .unicodeScalar)
609608
}
610609
}
611610

Sources/_StringProcessing/Engine/MEQuantify.swift

Lines changed: 154 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,49 @@
1+
private typealias ASCIIBitset = DSLTree.CustomCharacterClass.AsciiBitset
2+
13
extension Processor {
2-
func _doQuantifyMatch(_ payload: QuantifyPayload) -> Input.Index? {
4+
func _doASCIIBitsetMatch(
5+
_: AsciiBitsetRegister
6+
) -> Input.Index? {
7+
fatalError()
8+
}
9+
}
10+
11+
12+
extension String {
13+
func index(after idx: Index, isScalarSemantics: Bool) -> Index {
14+
if isScalarSemantics {
15+
return unicodeScalars.index(after: idx)
16+
} else {
17+
return index(after: idx)
18+
}
19+
}
20+
}
21+
22+
23+
extension Processor {
24+
25+
internal mutating func runQuantify(_ payload: QuantifyPayload) -> Bool {
26+
let matched: Bool
27+
switch (payload.quantKind, payload.minTrips, payload.maxExtraTrips) {
28+
case (.reluctant, _, _):
29+
assertionFailure(".reluctant is not supported by .quantify")
30+
// TODO: this was pre-refactoring behavior, should we fatal error
31+
// instead?
32+
return false
33+
case (.eager, 0, nil):
34+
runEagerZeroOrMoreQuantify(payload)
35+
return true
36+
case (.eager, 1, nil):
37+
return runEagerOneOrMoreQuantify(payload)
38+
case (_, 0, 1):
39+
runZeroOrOneQuantify(payload)
40+
return true
41+
default:
42+
return runGeneralQuantify(payload)
43+
}
44+
}
45+
46+
private func doQuantifyMatch(_ payload: QuantifyPayload) -> Input.Index? {
347
let isScalarSemantics = payload.isScalarSemantics
448

549
switch payload.type {
@@ -31,10 +75,8 @@ extension Processor {
3175
guard currentPosition < end else { return nil }
3276

3377
if payload.anyMatchesNewline {
34-
if isScalarSemantics {
35-
return input.unicodeScalars.index(after: currentPosition)
36-
}
37-
return input.index(after: currentPosition)
78+
return input.index(
79+
after: currentPosition, isScalarSemantics: isScalarSemantics)
3880
}
3981

4082
return input.matchAnyNonNewline(
@@ -47,14 +89,14 @@ extension Processor {
4789
/// Generic quantify instruction interpreter
4890
/// - Handles .eager and .posessive
4991
/// - Handles arbitrary minTrips and maxExtraTrips
50-
mutating func runQuantify(_ payload: QuantifyPayload) -> Bool {
92+
private mutating func runGeneralQuantify(_ payload: QuantifyPayload) -> Bool {
5193
assert(payload.quantKind != .reluctant)
5294

5395
var trips = 0
5496
var maxExtraTrips = payload.maxExtraTrips
5597

5698
while trips < payload.minTrips {
57-
guard let next = _doQuantifyMatch(payload) else {
99+
guard let next = doQuantifyMatch(payload) else {
58100
signalFailure()
59101
return false
60102
}
@@ -67,7 +109,7 @@ extension Processor {
67109
return true
68110
}
69111

70-
guard let next = _doQuantifyMatch(payload) else {
112+
guard let next = doQuantifyMatch(payload) else {
71113
return true
72114
}
73115
maxExtraTrips = maxExtraTrips.map { $0 - 1 }
@@ -81,7 +123,7 @@ extension Processor {
81123
while true {
82124
if maxExtraTrips == 0 { break }
83125

84-
guard let next = _doQuantifyMatch(payload) else {
126+
guard let next = doQuantifyMatch(payload) else {
85127
break
86128
}
87129
maxExtraTrips = maxExtraTrips.map({$0 - 1})
@@ -100,67 +142,147 @@ extension Processor {
100142
}
101143

102144
/// Specialized quantify instruction interpreter for `*`, always succeeds
103-
mutating func runEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) {
145+
private mutating func runEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) {
104146
assert(payload.quantKind == .eager
105147
&& payload.minTrips == 0
106148
&& payload.maxExtraTrips == nil)
107-
_doRunEagerZeroOrMoreQuantify(payload)
149+
_ = doRunEagerZeroOrMoreQuantify(payload)
108150
}
109151

110-
// NOTE: So-as to inline into one-or-more call, which makes a significant
111-
// performance difference
152+
// Returns whether it matched at least once
153+
//
154+
// NOTE: inline-always so-as to inline into one-or-more call, which makes a
155+
// significant performance difference
112156
@inline(__always)
113-
mutating func _doRunEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) {
114-
guard let next = _doQuantifyMatch(payload) else {
115-
// Consumed no input, no point saved
116-
return
117-
}
118-
157+
private mutating func doRunEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) -> Bool {
119158
// Create a quantified save point for every part of the input matched up
120159
// to the final position.
160+
let isScalarSemantics = payload.isScalarSemantics
121161
let rangeStart = currentPosition
122162
var rangeEnd = currentPosition
123-
currentPosition = next
124-
while true {
125-
guard let next = _doQuantifyMatch(payload) else { break }
126-
rangeEnd = currentPosition
127-
currentPosition = next
163+
var matchedOnce = false
164+
165+
switch payload.type {
166+
case .asciiBitset:
167+
let bitset = registers[payload.bitset]
168+
while true {
169+
guard let next = input.matchASCIIBitset(
170+
bitset,
171+
at: currentPosition,
172+
limitedBy: end,
173+
isScalarSemantics: isScalarSemantics)
174+
else {
175+
break
176+
}
177+
matchedOnce = true
178+
rangeEnd = currentPosition
179+
currentPosition = next
180+
assert(currentPosition > rangeEnd)
181+
}
182+
case .asciiChar:
183+
let asciiScalar = UnicodeScalar.init(_value: UInt32(payload.asciiChar))
184+
while true {
185+
guard let next = input.matchScalar(
186+
asciiScalar,
187+
at: currentPosition,
188+
limitedBy: end,
189+
boundaryCheck: !isScalarSemantics,
190+
isCaseInsensitive: false)
191+
else {
192+
break
193+
}
194+
matchedOnce = true
195+
rangeEnd = currentPosition
196+
currentPosition = next
197+
assert(currentPosition > rangeEnd)
198+
}
199+
case .builtin:
200+
let builtin = payload.builtin
201+
let isInverted = payload.builtinIsInverted
202+
let isStrictASCII = payload.builtinIsStrict
203+
while true {
204+
guard let next = input.matchBuiltinCC(
205+
builtin,
206+
at: currentPosition,
207+
limitedBy: end,
208+
isInverted: isInverted,
209+
isStrictASCII: isStrictASCII,
210+
isScalarSemantics: isScalarSemantics)
211+
else {
212+
break
213+
}
214+
matchedOnce = true
215+
rangeEnd = currentPosition
216+
currentPosition = next
217+
assert(currentPosition > rangeEnd)
218+
}
219+
case .any:
220+
while true {
221+
guard currentPosition < end else { break }
222+
let next: String.Index?
223+
if payload.anyMatchesNewline {
224+
next = input.index(
225+
after: currentPosition, isScalarSemantics: isScalarSemantics)
226+
} else {
227+
next = input.matchAnyNonNewline(
228+
at: currentPosition,
229+
limitedBy: end,
230+
isScalarSemantics: isScalarSemantics)
231+
}
232+
233+
guard let next else { break }
234+
matchedOnce = true
235+
rangeEnd = currentPosition
236+
currentPosition = next
237+
assert(currentPosition > rangeEnd)
238+
}
239+
}
240+
241+
guard matchedOnce else {
242+
// Consumed no input, no point saved
243+
return false
128244
}
129245

130-
savePoints.append(makeQuantifiedSavePoint(rangeStart..<rangeEnd, isScalarSemantics: payload.isScalarSemantics))
246+
// NOTE: We can't assert that rangeEnd trails currentPosition by one
247+
// position, because newline-sequence in scalar semantic mode still
248+
// matches two scalars
249+
250+
savePoints.append(makeQuantifiedSavePoint(
251+
rangeStart..<rangeEnd, isScalarSemantics: payload.isScalarSemantics))
252+
return true
131253
}
132254

133255
/// Specialized quantify instruction interpreter for `+`
134-
mutating func runEagerOneOrMoreQuantify(_ payload: QuantifyPayload) -> Bool {
256+
private mutating func runEagerOneOrMoreQuantify(_ payload: QuantifyPayload) -> Bool {
135257
assert(payload.quantKind == .eager
136258
&& payload.minTrips == 1
137259
&& payload.maxExtraTrips == nil)
138260

139261
// Match at least once
140-
guard let next = _doQuantifyMatch(payload) else {
262+
guard let next = doQuantifyMatch(payload) else {
141263
signalFailure()
142264
return false
143265
}
144266

145267
// Run `a+` as `aa*`
146268
currentPosition = next
147-
_doRunEagerZeroOrMoreQuantify(payload)
269+
doRunEagerZeroOrMoreQuantify(payload)
148270
return true
149271
}
150272

151273
/// Specialized quantify instruction interpreter for ?
152-
mutating func runZeroOrOneQuantify(_ payload: QuantifyPayload) -> Bool {
274+
private mutating func runZeroOrOneQuantify(_ payload: QuantifyPayload) {
153275
assert(payload.minTrips == 0
154276
&& payload.maxExtraTrips == 1)
155-
let next = _doQuantifyMatch(payload)
277+
let next = doQuantifyMatch(payload)
156278
guard let idx = next else {
157-
return true // matched zero times
279+
return // matched zero times
158280
}
159281
if payload.quantKind != .possessive {
160282
// Save the zero match
161283
savePoints.append(makeSavePoint(resumingAt: currentPC+1))
162284
}
163285
currentPosition = idx
164-
return true
286+
return
165287
}
166288
}

Sources/_StringProcessing/Engine/Processor.swift

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -515,26 +515,12 @@ extension Processor {
515515
controller.step()
516516
}
517517
case .quantify:
518-
let quantPayload = payload.quantify
519-
let matched: Bool
520-
switch (quantPayload.quantKind, quantPayload.minTrips, quantPayload.maxExtraTrips) {
521-
case (.reluctant, _, _):
522-
assertionFailure(".reluctant is not supported by .quantify")
523-
return
524-
case (.eager, 0, nil):
525-
runEagerZeroOrMoreQuantify(quantPayload)
526-
matched = true
527-
case (.eager, 1, nil):
528-
matched = runEagerOneOrMoreQuantify(quantPayload)
529-
case (_, 0, 1):
530-
matched = runZeroOrOneQuantify(quantPayload)
531-
default:
532-
matched = runQuantify(quantPayload)
533-
}
534-
if matched {
518+
if runQuantify(payload.quantify) {
535519
controller.step()
536520
}
537521

522+
523+
538524
case .consumeBy:
539525
let reg = payload.consumer
540526
let consumer = registers[reg]

0 commit comments

Comments
 (0)