Skip to content

Commit 89be882

Browse files
authored
Merge pull request #875 from Catfish-Man/faster-filenames
Optimize matchesFilenamePattern (rdar://164182287)
2 parents 1d602f2 + ebb206b commit 89be882

File tree

4 files changed

+66
-47
lines changed

4 files changed

+66
-47
lines changed

Sources/SWBCore/Settings/RecursiveSearchPathResolver.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ public final class RecursiveSearchPathResolver: Sendable {
202202
result.append(Path("."))
203203
} else if sourcePath.isRoot {
204204
result.append(Path(String(path.str[path.str.utf8.index(after: path.str.utf8.startIndex)...])))
205-
} else if path.str.hasPrefix(sourcePath.str) && Path.pathSeparatorsUTF8.contains(path.str.utf8[path.str.utf8.index(path.str.utf8.startIndex, offsetBy: sourcePath.str.utf8.count)]) {
205+
} else if path.str.hasPrefix(sourcePath.str) && Path.isUTF8PathSeparator(path.str.utf8[path.str.utf8.index(path.str.utf8.startIndex, offsetBy: sourcePath.str.utf8.count)]) {
206206
// FIXME: Use dropFirst() once available everywhere.
207207
result.append(Path(String(path.str[path.str.utf8.index(path.str.utf8.startIndex, offsetBy: sourcePath.str.utf8.count + 1)...])))
208208
} else {

Sources/SWBUtil/Path.swift

Lines changed: 44 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -69,18 +69,45 @@ public struct Path: Serializable, Sendable {
6969
/// The system path separator.
7070
#if os(Windows)
7171
public static let pathSeparator = Character("\\")
72-
public static let pathSeparatorUTF8 = UInt8(ascii: "\\")
73-
public static let pathSeparatorsUTF8 = Set([UInt8(ascii: "\\"), UInt8(ascii: "/")])
72+
@inline(__always) public static var pathSeparatorUTF8: UInt8 { UInt8(ascii: "\\") }
7473
public static let pathEnvironmentSeparator = Character(";")
75-
public static let pathSeparators = Set("\\/")
74+
@inline(__always) public static func isUTF8PathSeparator(_ char: UInt8, separators: (some Collection<Character>)? = ([Character]?).none) -> Bool {
75+
guard let separators else {
76+
return char == pathSeparatorUTF8 || char == UInt8(ascii: "/")
77+
}
78+
// This is a bit inefficient, but separators should always be nil outside of tests
79+
return separators.contains(String(decoding: CollectionOfOne(char), as: UTF8.self))
80+
}
81+
@inline(__always) public static func firstPathSeparatorIndex(in str: some StringProtocol, separators: (some Collection<Character>)?) -> String.Index? {
82+
guard let separators else {
83+
return str.utf8.firstIndex(where: { Path.isUTF8PathSeparator($0, separators: separators) })
84+
}
85+
return str.firstIndex(where: { separators.contains($0) })
86+
}
7687
#else
7788
public static let pathSeparator = Character("/")
78-
public static let pathSeparatorUTF8 = UInt8(ascii: "/")
79-
public static let pathSeparatorsUTF8 = Set([UInt8(ascii: "/")])
89+
@inline(__always) public static var pathSeparatorUTF8: UInt8 { UInt8(ascii: "/") }
8090
public static let pathEnvironmentSeparator = Character(":")
81-
public static let pathSeparators = Set([Character("/")])
91+
@inline(__always) public static func isUTF8PathSeparator(_ char: UInt8, separators: (some Collection<Character>)? = ([Character]?).none) -> Bool {
92+
guard let separators else {
93+
return char == pathSeparatorUTF8
94+
}
95+
// This is a bit inefficient, but separators should always be nil outside of tests
96+
return separators.contains(String(decoding: CollectionOfOne(char), as: UTF8.self))
97+
}
98+
@inline(__always) public static func firstPathSeparatorIndex(in str: some StringProtocol, separators: (some Collection<Character>)?) -> String.Index? {
99+
guard let separators else {
100+
return str.utf8.index(of: pathSeparatorUTF8)
101+
}
102+
return str.firstIndex(where: { separators.contains($0) })
103+
}
82104
#endif
83105

106+
@inline(__always) public static func isPathSeparator(_ char: Character, separators: (some Collection<Character>)?) -> Bool {
107+
guard let c = char.utf8.first else { return false }
108+
return isUTF8PathSeparator(c, separators: separators)
109+
}
110+
84111
/// The system path separator, as a string.
85112
public static let pathSeparatorString = String(pathSeparator)
86113

@@ -717,9 +744,10 @@ public struct Path: Serializable, Sendable {
717744
var numComponents = 0
718745
var isInPathComponent = false
719746
var nextCharacterIsEscaped = false
720-
for idx in pattern.indices {
747+
for byte in pattern.utf8 {
721748
// Skip over path separators, unless they're escaped.
722-
if pattern[idx] == Path.pathSeparator {
749+
//TODO: should this (and other similar uses) be Path.isUTF8PathSeparator(byte) instead for Windows?
750+
if byte == Path.pathSeparatorUTF8 {
723751
if !nextCharacterIsEscaped {
724752
isInPathComponent = false
725753
}
@@ -736,7 +764,7 @@ public struct Path: Serializable, Sendable {
736764
nextCharacterIsEscaped = false
737765
}
738766
else {
739-
nextCharacterIsEscaped = (pattern[idx] == Character("\\"))
767+
nextCharacterIsEscaped = (byte == UInt8(ascii: "\\"))
740768
}
741769
}
742770
return numComponents
@@ -746,19 +774,20 @@ public struct Path: Serializable, Sendable {
746774
var numPathComponentsInPath = 0
747775
var isInPathComponent = false
748776
var firstIdx: String.Index?
749-
for idx in self.str.indices.reversed() {
777+
let utf8Str = self.str.utf8
778+
for idx in utf8Str.indices.reversed() {
750779
// Skip over path separators. We ignore backslashes here, since paths don't have escape characters.
751-
if self.str[idx] == Path.pathSeparator {
780+
if utf8Str[idx] == Path.pathSeparatorUTF8 {
752781
isInPathComponent = false
753782
// If we've found the expected number of path components, then we stop, and record the index of the first character we want to match against.
754783
if numPathComponentsInPath == numPathComponentsInPattern {
755-
if idx != self.str.endIndex {
756-
firstIdx = self.str.index(after: idx)
784+
if idx != utf8Str.endIndex {
785+
firstIdx = utf8Str.index(after: idx)
757786
}
758787
break
759788
}
760789
}
761-
else if idx == self.str.startIndex {
790+
else if idx == utf8Str.startIndex {
762791
// If we didn't encounter a path separator, then the full string is the trailing subpath.
763792
firstIdx = idx
764793
break
@@ -781,7 +810,7 @@ public struct Path: Serializable, Sendable {
781810
}
782811

783812
// Create a string from the first index we found to the end of the path.
784-
let trailingSubpath = String(self.str[first..<self.str.endIndex])
813+
let trailingSubpath = self.str[first..<self.str.endIndex]
785814

786815
// Match the pattern against the requisite number of trailing path components.
787816
do {

Sources/SWBUtil/fnmatch.swift

Lines changed: 19 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -36,16 +36,6 @@ private enum RangeStatus {
3636
case error
3737
}
3838

39-
private extension StringProtocol {
40-
@inline(__always)
41-
func firstIndex(matching characters: Set<Character>) -> String.Index? {
42-
if characters.isEmpty {
43-
return nil
44-
}
45-
return firstIndex(where: {characters.contains($0)})
46-
}
47-
}
48-
4939
/// Multi-platform fnmatch implementation. This is intended to be a close match the the POSIX fnmatch of all platforms including Windows (though not all options are supported).
5040
///
5141
/// - parameter pattern: The pattern to match. When using the ``FnmatchOptions/pathname`` option, any path representation in the pattern is expected to use the POSIX path separator (`/`) to match with the input, and on Windows, the path separator (`/`) will be matched to either separator in the input string ( both `/` and `\` will be matched).
@@ -54,7 +44,7 @@ private extension StringProtocol {
5444
/// - returns: `true` if the pattern matches the input, `false` otherwise.
5545
///
5646
/// - note: On Windows and when using the ``FnmatchOptions/pathname`` option, both separators (`/` and `\`) are recognized (see note on pattern parameter).
57-
public func fnmatch(pattern: some StringProtocol, input: some StringProtocol, options: FnmatchOptions = .default, pathSeparators: Set<Character> = Path.pathSeparators) throws
47+
public func fnmatch(pattern: some StringProtocol, input: some StringProtocol, options: FnmatchOptions = .default, pathSeparators: (some Collection<Character>)? = ([Character]?).none) throws
5848
-> Bool
5949
{
6050
// Use Substrings to avoid String allocations
@@ -76,32 +66,32 @@ public func fnmatch(pattern: some StringProtocol, input: some StringProtocol, op
7666
return false
7767
}
7868
case "?":
79-
guard let _sc = input.first else {
69+
guard let _sc = input.utf8.first else {
8070
return false
8171
}
82-
if options.contains(.pathname) && pathSeparators.contains(_sc) {
72+
if options.contains(.pathname) && Path.isUTF8PathSeparator(_sc, separators: pathSeparators) {
8373
if backtrack() {
8474
return false
8575
}
8676
}
8777
input = input.dropFirst()
8878
case "*":
89-
var p = pattern.first
90-
while pattern.first == "*" {
79+
var p = pattern.utf8.first
80+
while pattern.utf8.first == UInt8(ascii: "*") {
9181
// consume multiple '*' in pattern
9282
pattern = pattern.dropFirst()
93-
p = pattern.first
83+
p = pattern.utf8.first
9484
}
9585
if p == nil {
9686
if options.contains(.pathname) {
9787
// make sure input does not have any more path separators
98-
return input.firstIndex(matching: pathSeparators) == nil ? true : false
88+
return Path.firstPathSeparatorIndex(in: input, separators: pathSeparators) == nil
9989
} else {
10090
return true // pattern matched everything else in input
10191
}
102-
} else if pattern.first == "/" && options.contains(.pathname) {
92+
} else if p == UInt8(ascii: "/") && options.contains(.pathname) {
10393
// we have a '*/' pattern input must have an path separators to continue
104-
guard let newInputIndex = input.firstIndex(matching: pathSeparators) else {
94+
guard let newInputIndex = Path.firstPathSeparatorIndex(in: input, separators: pathSeparators) else {
10595
return false
10696
}
10797
input.removeSubrange(..<newInputIndex)
@@ -110,14 +100,14 @@ public func fnmatch(pattern: some StringProtocol, input: some StringProtocol, op
110100
bt_pattern = pattern
111101
bt_input = input
112102
case "[":
113-
guard let _sc = input.first else {
114-
return false
115-
}
116-
if pathSeparators.contains(_sc) && options.contains(.pathname) {
103+
if let first = input.utf8.first, Path.isUTF8PathSeparator(first, separators: pathSeparators) && options.contains(.pathname) {
117104
if backtrack() {
118105
return false
119106
}
120107
}
108+
guard let _sc = input.first else {
109+
return false
110+
}
121111
var new_input = input.dropFirst()
122112
var new_pattern = pattern
123113
switch rangematch(pattern: &new_pattern, input: &new_input, test: _sc, options: options, pathSeparators: pathSeparators) {
@@ -146,7 +136,7 @@ public func fnmatch(pattern: some StringProtocol, input: some StringProtocol, op
146136
continue
147137
} else {
148138
// windows need to test for both path separators
149-
if _pc == "/" && options.contains(.pathname) && pathSeparators.contains(_sc) {
139+
if _pc == "/" && options.contains(.pathname) && Path.isPathSeparator(_sc, separators: pathSeparators) {
150140
continue
151141
}
152142
if backtrack() {
@@ -177,15 +167,15 @@ public func fnmatch(pattern: some StringProtocol, input: some StringProtocol, op
177167
}
178168

179169
@inline(__always)
180-
private func rangematch(pattern: inout Substring, input: inout Substring, test: Character, options: FnmatchOptions, pathSeparators: Set<Character>) -> RangeStatus {
170+
private func rangematch(pattern: inout Substring, input: inout Substring, test: Character, options: FnmatchOptions, pathSeparators: (some Collection<Character>)? = ([Character]?).none) -> RangeStatus {
181171
var test = test
182172

183-
if !pattern.contains("]") {
173+
if !pattern.utf8.contains(UInt8(ascii: "]")) {
184174
// unmatched '[' test as literal '['
185175
return "[" == test ? .match : .noMatch
186176
}
187177

188-
let negate = pattern.first == "!"
178+
let negate = pattern.utf8.first == UInt8(ascii: "!")
189179
if negate {
190180
pattern = pattern.dropFirst()
191181
}
@@ -198,13 +188,13 @@ private func rangematch(pattern: inout Substring, input: inout Substring, test:
198188
if c == "]" {
199189
break
200190
}
201-
if options.contains(.pathname) && pathSeparators.contains(c) {
191+
if options.contains(.pathname) && Path.isPathSeparator(c, separators: pathSeparators) {
202192
return .noMatch
203193
}
204194
if options.contains(.caseInsensitive) {
205195
c = Character(c.lowercased())
206196
}
207-
if pattern.first == "-" {
197+
if pattern.utf8.first == UInt8(ascii: "-") {
208198
let subPattern = pattern.dropFirst()
209199
if var c2 = subPattern.first {
210200
if c2 != "]" {

Tests/SWBUtilTests/FnmatchTests.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ import SWBUtil
247247

248248
@Test(arguments: [true, false])
249249
func pathnameMatch(isWindows: Bool) throws {
250-
let separators = isWindows ? Set("\\/") : Set([Character("/")])
250+
let separators = isWindows ? "\\/" : "/"
251251

252252
try assertFnmatch(pattern: "x?y", input: "x/y", separators: separators)
253253
try assertFnmatch(pattern: "x?y", input: "x/y", shouldMatch: false, options: [.pathname], separators: separators)
@@ -272,7 +272,7 @@ import SWBUtil
272272
}
273273

274274
func assertFnmatch(
275-
pattern: String, input: String, shouldMatch: Bool = true, options: FnmatchOptions = .default, separators: Set<Character> = Path.pathSeparators, sourceLocation: SourceLocation = #_sourceLocation) throws {
275+
pattern: String, input: String, shouldMatch: Bool = true, options: FnmatchOptions = .default, separators: (some Collection<Character>)? = ([Character]?).none, sourceLocation: SourceLocation = #_sourceLocation) throws {
276276
let comment = Comment(stringLiteral: "\(pattern) \(shouldMatch ? "should" : "should not") match \(input)")
277277
let result = try fnmatch(pattern: pattern, input: input, options: options, pathSeparators: separators)
278278
shouldMatch ? #expect(result, comment, sourceLocation: sourceLocation) : #expect(!result, comment, sourceLocation: sourceLocation)

0 commit comments

Comments
 (0)