From 094089b0717e7fb7405d675b66e8312bfb5d0237 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Thu, 25 Sep 2025 13:03:19 -0500 Subject: [PATCH 1/8] Implement auto-possessification This optimization recognizes quantifications followed by an atom that can't be matched at the start of the quantification, and converts them to possessive, eliminating any backtracking at that position. This change includes an accessor for the first "required" atom (which will be used in other optimizations), and converts DSL.QuantificationKind to a struct to support the possessification changes. --- .../ByteCodeGen+DSLList.swift | 18 +- Sources/_StringProcessing/ByteCodeGen.swift | 10 +- Sources/_StringProcessing/Compiler.swift | 4 +- .../_StringProcessing/LiteralPrinter.swift | 15 +- Sources/_StringProcessing/Regex/DSLList.swift | 295 ++++++++++++++++++ Sources/_StringProcessing/Regex/DSLTree.swift | 175 ++++++++++- Tests/RegexTests/MatchTests.swift | 7 +- Tests/RegexTests/OptimizationTests.swift | 78 +++++ 8 files changed, 564 insertions(+), 38 deletions(-) create mode 100644 Tests/RegexTests/OptimizationTests.swift diff --git a/Sources/_StringProcessing/ByteCodeGen+DSLList.swift b/Sources/_StringProcessing/ByteCodeGen+DSLList.swift index c61c37fdf..330018878 100644 --- a/Sources/_StringProcessing/ByteCodeGen+DSLList.swift +++ b/Sources/_StringProcessing/ByteCodeGen+DSLList.swift @@ -12,7 +12,7 @@ internal import _RegexParser extension Compiler.ByteCodeGen { - mutating func emitRoot(_ root: DSLList) throws -> MEProgram { + mutating func emitRoot(_ root: inout DSLList) throws -> MEProgram { // If the whole regex is a matcher, then the whole-match value // is the constructed value. Denote that the current value // register is the processor's value output. @@ -22,7 +22,11 @@ extension Compiler.ByteCodeGen { default: break } - + + if optimizationsEnabled { + root.autoPossessify() + } + var list = root.nodes[...] try emitNode(&list) @@ -352,15 +356,7 @@ fileprivate extension Compiler.ByteCodeGen { _ kind: DSLTree.QuantificationKind, _ list: inout ArraySlice ) throws { - let updatedKind: AST.Quantification.Kind - switch kind { - case .explicit(let kind): - updatedKind = kind.ast - case .syntax(let kind): - updatedKind = kind.ast.applying(options) - case .default: - updatedKind = options.defaultQuantificationKind - } + let updatedKind = kind.applying(options: options) let (low, high) = amount.bounds guard let low = low else { diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index d6ec4d716..24c94da11 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -506,15 +506,7 @@ extension Compiler.ByteCodeGen { _ kind: DSLTree.QuantificationKind, _ child: DSLTree.Node ) throws { - let updatedKind: AST.Quantification.Kind - switch kind { - case .explicit(let kind): - updatedKind = kind.ast - case .syntax(let kind): - updatedKind = kind.ast.applying(options) - case .default: - updatedKind = options.defaultQuantificationKind - } + let updatedKind = kind.applying(options: options) let (low, high) = amount.bounds guard let low = low else { diff --git a/Sources/_StringProcessing/Compiler.swift b/Sources/_StringProcessing/Compiler.swift index e2fd2a284..25e6e4cf6 100644 --- a/Sources/_StringProcessing/Compiler.swift +++ b/Sources/_StringProcessing/Compiler.swift @@ -47,13 +47,13 @@ class Compiler { __consuming func emitViaList() throws -> MEProgram { // TODO: Handle global options - let dslList = DSLList(tree: tree) + var dslList = DSLList(tree: tree) var codegen = ByteCodeGen( options: options, compileOptions: compileOptions, captureList: tree.captureList) - return try codegen.emitRoot(dslList) + return try codegen.emitRoot(&dslList) } } diff --git a/Sources/_StringProcessing/LiteralPrinter.swift b/Sources/_StringProcessing/LiteralPrinter.swift index fa80f032d..e1dc3fa23 100644 --- a/Sources/_StringProcessing/LiteralPrinter.swift +++ b/Sources/_StringProcessing/LiteralPrinter.swift @@ -224,13 +224,16 @@ extension LiteralPrinter { } mutating func outputQuantificationKind(_ kind: DSLTree.QuantificationKind) { - switch kind { - case .`default`: + guard let astKind = kind.quantificationKind?.ast else { // We can treat this as if the current default had been given explicity. outputQuantificationKind( .explicit(.init(ast: options.defaultQuantificationKind))) - case let .explicit(kind): - switch kind.ast { + return + } + + if kind.isExplicit { + // Explicitly provided modifiers need to match the current option state. + switch astKind { case .eager: output(options.isReluctantByDefault ? "?" : "") case .reluctant: @@ -242,9 +245,9 @@ extension LiteralPrinter { fatalError() #endif } - case let .syntax(kind): + } else { // Syntactically-specified quantification modifiers can stay as-is. - switch kind.ast { + switch astKind { case .eager: output("") case .reluctant: diff --git a/Sources/_StringProcessing/Regex/DSLList.swift b/Sources/_StringProcessing/Regex/DSLList.swift index 1bbb0c9cb..772af2efb 100644 --- a/Sources/_StringProcessing/Regex/DSLList.swift +++ b/Sources/_StringProcessing/Regex/DSLList.swift @@ -94,3 +94,298 @@ extension DSLTree { }) } } + + +extension DSLList { + private func skipNode(_ position: inout Int) { + guard position < nodes.count else { + return + } + switch nodes[position] { + case let .orderedChoice(children): + let n = children.count + for _ in 0.. DSLTree.Atom?? { + guard position < nodes.count else { + return nil + } + + switch nodes[position] { + case .atom(let atom): + return switch atom { + case .changeMatchingOptions: + nil + default: + atom + } + + // In a concatenation, the first definitive child provides the answer, + // and then we need to skip past (in some cases at least) the remaining + // concatenation elements. + case .concatenation(let children): + var result: DSLTree.Atom?? = nil + var i = 0 + while i < children.count { + i += 1 + position += 1 + if let r = _requiredAtomImpl(&position) { + result = r + break + } + } + + for _ in i.. DSLTree.Atom? { + var position = 0 + return _requiredAtomImpl(&position) ?? nil + } + + + internal mutating func autoPossessifyNextQuantification(_ position: inout Int) -> (Int, DSLTree.Atom)? { + guard position < nodes.count else { + return nil + } + + switch nodes[position] { + case .quantification(let amount, _, _): + position += 1 + switch _requiredAtomImpl(&position) { + case .some(let atom?): + return (position - 1, atom) + case .none, .some(.none): + return nil + } + + case .concatenation(let children): + // If we find a valid quantification among this concatenation's components, + // we must look for a required atom in the sibling. If a definitive result + // is not found, pop up the recursion stack to find a sibling at a higher + // level. + var foundQuantification: (Int, DSLTree.Atom)? = nil + var foundNextAtom: DSLTree.Atom? = nil + var i = 0 + position += 1 + while i < children.count { + i += 1 + if let result = autoPossessifyNextQuantification(&position) { + foundQuantification = result + break + } + } + + while i < children.count { + i += 1 + position += 1 + if let result = _requiredAtomImpl(&position) { + foundNextAtom = result + break + } + } + + for _ in i.. Bool { + switch (self, other) { + case (.char(let a), .char(let b)): + return a != b + case (.scalar(let a), .scalar(let b)): + return a != b + case (.characterClass(let a), .characterClass(let b)): + return a.excludes(b) + + default: + return false + } + } +} + +extension DSLTree.Atom.CharacterClass { + func excludes(_ other: Self) -> Bool { + if other == .anyGrapheme || other == .anyUnicodeScalar { + return false + } + + return switch self { + case .anyGrapheme, .anyUnicodeScalar: + false + + case .digit: + switch other { + case .whitespace, .horizontalWhitespace, .verticalWhitespace, .newlineSequence, + .notWord, .notDigit: true + default: false + } + case .notDigit: + other == .digit + + case .horizontalWhitespace: + switch other { + case .word, .digit, .verticalWhitespace, .newlineSequence, + .notWhitespace, .notHorizontalWhitespace: true + default: false + } + case .notHorizontalWhitespace: + other == .horizontalWhitespace + + case .newlineSequence: + switch other { + case .word, .digit, .horizontalWhitespace, .notNewline: true + default: false + } + case .notNewline: + other == .newlineSequence + + case .whitespace: + switch other { + case .word, .digit, .notWhitespace: true + default: false + } + case .notWhitespace: + other == .whitespace + + case .verticalWhitespace: + switch other { + case .word, .digit, .notWhitespace, .notVerticalWhitespace: true + default: false + } + case .notVerticalWhitespace: + other == .verticalWhitespace + + case .word: + switch other { + case .whitespace, .horizontalWhitespace, .verticalWhitespace, .newlineSequence, + .notWord: true + default: false + } + case .notWord: + other == .word + } + } +} diff --git a/Sources/_StringProcessing/Regex/DSLTree.swift b/Sources/_StringProcessing/Regex/DSLTree.swift index 03a563978..a95d40033 100644 --- a/Sources/_StringProcessing/Regex/DSLTree.swift +++ b/Sources/_StringProcessing/Regex/DSLTree.swift @@ -92,19 +92,38 @@ extension DSLTree { } extension DSLTree { - enum QuantificationKind { + struct QuantificationKind { + var quantificationKind: _AST.QuantificationKind? + var isExplicit: Bool + var canAutoPossessify: Bool? + /// The default quantification kind, as set by options. - case `default` + static var `default`: Self { + .init(quantificationKind: nil, isExplicit: false, canAutoPossessify: nil) + } + /// An explicitly chosen kind, overriding any options. - case explicit(_AST.QuantificationKind) + static func explicit(_ kind: _AST.QuantificationKind) -> Self { + .init(quantificationKind: kind, isExplicit: true, canAutoPossessify: nil) + } + /// A kind set via syntax, which can be affected by options. - case syntax(_AST.QuantificationKind) + static func syntax(_ kind: _AST.QuantificationKind) -> Self { + .init(quantificationKind: kind, isExplicit: false, canAutoPossessify: nil) + } var ast: AST.Quantification.Kind? { - switch self { - case .default: return nil - case .explicit(let kind), .syntax(let kind): - return kind.ast + quantificationKind?.ast + } + + func applying(options: MatchingOptions) -> AST.Quantification.Kind { + guard let kind = quantificationKind?.ast else { + return options.defaultQuantificationKind + } + return if isExplicit { + kind + } else { + kind.applying(options) } } } @@ -889,6 +908,146 @@ extension DSLTree.Node { } } +// MARK: Required first and last atoms + +extension DSLTree.Node { + private func _requiredAtomImpl(forward: Bool) -> DSLTree.Atom?? { + switch self { + case .atom(let atom): + return switch atom { + case .changeMatchingOptions: + nil + default: + atom + } + + // In a concatenation, the first definitive child provides the answer. + case .concatenation(let children): + if forward { + for child in children { + if let result = child._requiredAtomImpl(forward: forward) { + return result + } + } + } else { + for child in children.reversed() { + if let result = child._requiredAtomImpl(forward: forward) { + return result + } + } + } + return nil + + // For a quoted literal, we can look at the first char + // TODO: matching semantics??? + case .quotedLiteral(let str): + return str.first.map(DSLTree.Atom.char) + + // TODO: custom character classes could/should participate here somehow + case .customCharacterClass: + return .some(nil) + + // Trivia/empty have no effect. + case .trivia, .empty: + return nil + + // For alternation and conditional, no required first (this could change + // if we identify the _same_ required first atom across all possibilities). + case .orderedChoice, .conditional: + return .some(nil) + + // Groups (and other parent nodes) defer to the child. + case .nonCapturingGroup(_, let child), .capture(_, _, let child, _), + .ignoreCapturesInTypedOutput(let child), + .limitCaptureNesting(let child): + return child._requiredAtomImpl(forward: forward) + + // A quantification that doesn't require its child to exist can still + // allow a start-only match. (e.g. `/(foo)?^bar/`) + case .quantification(let amount, _, let child): + return amount.requiresAtLeastOne + ? child._requiredAtomImpl(forward: forward) + : .some(nil) + + // Extended behavior isn't known, so we return `false` for safety. + case .consumer, .matcher, .characterPredicate, .absentFunction: + return .some(nil) + } + } + + internal func requiredFirstAtom() -> DSLTree.Atom? { + self._requiredAtomImpl(forward: true) ?? nil + } + + internal func requiredLastAtom() -> DSLTree.Atom? { + self._requiredAtomImpl(forward: false) ?? nil + } +} + + +private func _requiredAtomImpl(_ list: inout ArraySlice) -> DSLTree.Atom?? { + guard let node = list.popFirst() else { + return nil + } + switch node { + case .atom(let atom): + return switch atom { + case .changeMatchingOptions: + nil + default: + atom + } + + // In a concatenation, the first definitive child provides the answer. + case .concatenation(let children): + for _ in 0..) -> DSLTree.Atom? { + _requiredAtomImpl(&list) ?? nil +} + // MARK: AST wrapper types // // These wrapper types are required because even @_spi-marked public APIs can't diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index e36285ae6..78f133806 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -37,12 +37,15 @@ func _roundTripLiteral( return remadeRegex } +// Validate that the given regex compiles to the same instructions whether +// as a tree (original) or a list (new). We need to compile with optimizations +// disabled, since new optimizations are primarily landing in list compilation. func _validateListCompilation( _ regex: Regex ) throws -> Bool { - let treeCompiler = Compiler(tree: regex.program.tree) + let treeCompiler = Compiler(tree: regex.program.tree, compileOptions: .disableOptimizations) let treeProgram = try treeCompiler.emitViaTree() - let listCompiler = Compiler(tree: regex.program.tree) + let listCompiler = Compiler(tree: regex.program.tree, compileOptions: .disableOptimizations) let listProgram = try listCompiler.emitViaList() return treeProgram.instructions == listProgram.instructions } diff --git a/Tests/RegexTests/OptimizationTests.swift b/Tests/RegexTests/OptimizationTests.swift new file mode 100644 index 000000000..69a7d9df1 --- /dev/null +++ b/Tests/RegexTests/OptimizationTests.swift @@ -0,0 +1,78 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2025 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +import Testing +@testable @_spi(RegexBuilder) import _StringProcessing +@testable import _RegexParser + +@Suite +struct OptimizationTests { + @available(macOS 9999, *) + @Test(arguments: [#/a/#, #/a+/#, #/(?:a+)/#, #/(?:a)+/#, #/(?m)a+/#, #/ab?c/#, #/(?:a+)+$/#]) + func requiredFirstAtom(pattern: Regex) throws { + let atom = pattern.root.requiredFirstAtom() + #expect(atom?.literalCharacterValue == "a", "Missing first character atom in '\(pattern._literalPattern!)'") + + let list = DSLList(tree: pattern.program.tree) + var nodes = list.nodes[...] + let atom2 = _StringProcessing.requiredFirstAtom(&nodes) + #expect(atom2?.literalCharacterValue == "a", "Missing first character atom in '\(pattern._literalPattern!)'") + + let atom3 = list.requiredFirstAtom() + #expect(atom3?.literalCharacterValue == "a", "Missing first character atom in '\(pattern._literalPattern!)'") + } + + @available(macOS 9999, *) + @Test(arguments: [#/a?/#, #/(?:a|b)/#, #/[a]/#, #/a?bc/#]) + func noRequiredFirstAtom(pattern: Regex) throws { + let atom = pattern.root.requiredFirstAtom() + #expect(atom == nil, "Unexpected required first atom in '\(pattern._literalPattern!)'") + + let list = DSLList(tree: pattern.program.tree) + var nodes = list.nodes[...] + let atom2 = _StringProcessing.requiredFirstAtom(&nodes) + #expect(atom2 == nil, "Unexpected required first atom in '\(pattern._literalPattern!)'") + + let atom3 = list.requiredFirstAtom() + #expect(atom3 == nil, "Unexpected required first atom in '\(pattern._literalPattern!)'") + } + + @available(macOS 9999, *) + @Test(arguments: [#/a/#, #/a+/#, #/(?:a+)/#, #/(?:a)+/#, #/(?m)a+/#, #/cb?a/#]) + func requiredLastAtom(pattern: Regex) throws { + let atom = pattern.root.requiredLastAtom() + #expect(atom?.literalCharacterValue == "a", "Missing last character atom in '\(pattern._literalPattern!)'") + } + + @available(macOS 9999, *) + @Test(arguments: [#/a?/#, #/a*/#, #/(?:a|b)/#, #/[a]/#, #/abc?/#]) + func noRequiredLastAtom(pattern: Regex) throws { + let atom = pattern.root.requiredLastAtom() + #expect(atom == nil, "Unexpected required last atom in '\(pattern._literalPattern!)'") + } + + @available(macOS 9999, *) + @Test(arguments: [#/(?:a+b|b+a)/#]) //[#/a+b/#, #/a*b/#, #/\d+a/#, #/\w+\s/#, #/(?:a+b|b+a)/#]) + func autoPossessify(pattern: Regex) throws { + var list = DSLList(tree: pattern.program.tree) + var index = 0 + _ = list.autoPossessifyNextQuantification(&index) + print(pattern._literalPattern!) + dump(list) + } + + @available(macOS 9999, *) + @Test(arguments: [#/a?/#, #/(?:a|b)/#, #/(?:a+|b+)/#, #/[a]/#, #/a?a/#]) + func noAutoPossessify(pattern: Regex) throws { + var list = DSLList(tree: pattern.program.tree) + list.autoPossessify() + } +} From 9caf6a90f8b588a920602fdd4543061ee7fb24b7 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Tue, 7 Oct 2025 11:58:40 -0500 Subject: [PATCH 2/8] Update auto-possessification tests --- Tests/RegexTests/OptimizationTests.swift | 30 +++++++++++++++++------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/Tests/RegexTests/OptimizationTests.swift b/Tests/RegexTests/OptimizationTests.swift index 69a7d9df1..f2f74d6c6 100644 --- a/Tests/RegexTests/OptimizationTests.swift +++ b/Tests/RegexTests/OptimizationTests.swift @@ -13,8 +13,7 @@ import Testing @testable @_spi(RegexBuilder) import _StringProcessing @testable import _RegexParser -@Suite -struct OptimizationTests { +@Suite struct OptimizationTests { @available(macOS 9999, *) @Test(arguments: [#/a/#, #/a+/#, #/(?:a+)/#, #/(?:a)+/#, #/(?m)a+/#, #/ab?c/#, #/(?:a+)+$/#]) func requiredFirstAtom(pattern: Regex) throws { @@ -60,19 +59,34 @@ struct OptimizationTests { } @available(macOS 9999, *) - @Test(arguments: [#/(?:a+b|b+a)/#]) //[#/a+b/#, #/a*b/#, #/\d+a/#, #/\w+\s/#, #/(?:a+b|b+a)/#]) + @Test(arguments: [#/a+b/#, #/a*b/#, #/\w+\s/#, #/(?:a+b|b+a)/#]) // , #/\d+a/# func autoPossessify(pattern: Regex) throws { var list = DSLList(tree: pattern.program.tree) - var index = 0 - _ = list.autoPossessifyNextQuantification(&index) - print(pattern._literalPattern!) - dump(list) + list.autoPossessify() + for node in list.nodes { + switch node { + case .quantification(_, let kind, _): + #expect( + kind.isExplicit && kind.quantificationKind?.ast == .possessive, + "Expected possessification in '\(pattern._literalPattern!)'") + default: break + } + } } @available(macOS 9999, *) - @Test(arguments: [#/a?/#, #/(?:a|b)/#, #/(?:a+|b+)/#, #/[a]/#, #/a?a/#]) + @Test(arguments: [#/a?/#, #/a+a/#, #/(?:a|b)/#, #/(?:a+|b+)/#, #/[a]/#, #/a?a/#]) func noAutoPossessify(pattern: Regex) throws { var list = DSLList(tree: pattern.program.tree) list.autoPossessify() + for node in list.nodes { + switch node { + case .quantification(_, let kind, _): + #expect( + kind.quantificationKind?.ast != .possessive, + "Unexpected possessification in '\(pattern._literalPattern!)'") + default: break + } + } } } From c6fad9d0dd61c1c22e9b29bd86b298c294180829 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Thu, 9 Oct 2025 11:48:01 -0500 Subject: [PATCH 3/8] Eliminate some no longer relevant tests --- Tests/RegexTests/OptimizationTests.swift | 38 ++++-------------------- 1 file changed, 5 insertions(+), 33 deletions(-) diff --git a/Tests/RegexTests/OptimizationTests.swift b/Tests/RegexTests/OptimizationTests.swift index f2f74d6c6..9a425de75 100644 --- a/Tests/RegexTests/OptimizationTests.swift +++ b/Tests/RegexTests/OptimizationTests.swift @@ -15,47 +15,19 @@ import Testing @Suite struct OptimizationTests { @available(macOS 9999, *) - @Test(arguments: [#/a/#, #/a+/#, #/(?:a+)/#, #/(?:a)+/#, #/(?m)a+/#, #/ab?c/#, #/(?:a+)+$/#]) + @Test(arguments: [#/a/#, #/a+/#, #/(?:a+)/#, #/(?:a)+/#, #/(?m)a+/#, #/ab?c/#, #/(?:a+)+$/#, #/(?:(?:a+b)+b)/#]) func requiredFirstAtom(pattern: Regex) throws { - let atom = pattern.root.requiredFirstAtom() - #expect(atom?.literalCharacterValue == "a", "Missing first character atom in '\(pattern._literalPattern!)'") - let list = DSLList(tree: pattern.program.tree) - var nodes = list.nodes[...] - let atom2 = _StringProcessing.requiredFirstAtom(&nodes) - #expect(atom2?.literalCharacterValue == "a", "Missing first character atom in '\(pattern._literalPattern!)'") - - let atom3 = list.requiredFirstAtom() - #expect(atom3?.literalCharacterValue == "a", "Missing first character atom in '\(pattern._literalPattern!)'") + let atom = list.requiredFirstAtom() + #expect(atom?.literalCharacterValue == "a", "Missing first character atom in '\(pattern._literalPattern!)'") } @available(macOS 9999, *) @Test(arguments: [#/a?/#, #/(?:a|b)/#, #/[a]/#, #/a?bc/#]) func noRequiredFirstAtom(pattern: Regex) throws { - let atom = pattern.root.requiredFirstAtom() - #expect(atom == nil, "Unexpected required first atom in '\(pattern._literalPattern!)'") - let list = DSLList(tree: pattern.program.tree) - var nodes = list.nodes[...] - let atom2 = _StringProcessing.requiredFirstAtom(&nodes) - #expect(atom2 == nil, "Unexpected required first atom in '\(pattern._literalPattern!)'") - - let atom3 = list.requiredFirstAtom() - #expect(atom3 == nil, "Unexpected required first atom in '\(pattern._literalPattern!)'") - } - - @available(macOS 9999, *) - @Test(arguments: [#/a/#, #/a+/#, #/(?:a+)/#, #/(?:a)+/#, #/(?m)a+/#, #/cb?a/#]) - func requiredLastAtom(pattern: Regex) throws { - let atom = pattern.root.requiredLastAtom() - #expect(atom?.literalCharacterValue == "a", "Missing last character atom in '\(pattern._literalPattern!)'") - } - - @available(macOS 9999, *) - @Test(arguments: [#/a?/#, #/a*/#, #/(?:a|b)/#, #/[a]/#, #/abc?/#]) - func noRequiredLastAtom(pattern: Regex) throws { - let atom = pattern.root.requiredLastAtom() - #expect(atom == nil, "Unexpected required last atom in '\(pattern._literalPattern!)'") + let atom = list.requiredFirstAtom() + #expect(atom == nil, "Unexpected required first atom in '\(pattern._literalPattern!)'") } @available(macOS 9999, *) From 469785aa8b0afcbf1f4cfb419a0755ebd37a82c3 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Thu, 9 Oct 2025 11:48:20 -0500 Subject: [PATCH 4/8] Support auto-possessification of nested quantifiers --- Sources/_StringProcessing/Regex/DSLList.swift | 9 ++++++++- Tests/RegexTests/OptimizationTests.swift | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Sources/_StringProcessing/Regex/DSLList.swift b/Sources/_StringProcessing/Regex/DSLList.swift index 772af2efb..233fdde8c 100644 --- a/Sources/_StringProcessing/Regex/DSLList.swift +++ b/Sources/_StringProcessing/Regex/DSLList.swift @@ -216,10 +216,17 @@ extension DSLList { switch nodes[position] { case .quantification(let amount, _, _): + let quantPosition = position position += 1 + + // Do a search within this quantification's contents + // FIXME: How to handle an inner quantification surfacing here? + var innerPosition = position + _ = autoPossessifyNextQuantification(&innerPosition) + switch _requiredAtomImpl(&position) { case .some(let atom?): - return (position - 1, atom) + return (quantPosition, atom) case .none, .some(.none): return nil } diff --git a/Tests/RegexTests/OptimizationTests.swift b/Tests/RegexTests/OptimizationTests.swift index 9a425de75..83185df2d 100644 --- a/Tests/RegexTests/OptimizationTests.swift +++ b/Tests/RegexTests/OptimizationTests.swift @@ -31,7 +31,7 @@ import Testing } @available(macOS 9999, *) - @Test(arguments: [#/a+b/#, #/a*b/#, #/\w+\s/#, #/(?:a+b|b+a)/#]) // , #/\d+a/# + @Test(arguments: [#/a+b/#, #/a*b/#, #/\w+\s/#, #/(?:a+b|b+a)/#, #/(?:(?:a+b)+b)/#]) func autoPossessify(pattern: Regex) throws { var list = DSLList(tree: pattern.program.tree) list.autoPossessify() From 9e9978cc921e1d99a8b3555b281ed69bc6da3b67 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Thu, 9 Oct 2025 18:26:31 -0500 Subject: [PATCH 5/8] Don't use negative lookaheads for required atom --- Sources/_StringProcessing/Regex/DSLList.swift | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Sources/_StringProcessing/Regex/DSLList.swift b/Sources/_StringProcessing/Regex/DSLList.swift index 233fdde8c..2ab5a38a6 100644 --- a/Sources/_StringProcessing/Regex/DSLList.swift +++ b/Sources/_StringProcessing/Regex/DSLList.swift @@ -180,7 +180,12 @@ extension DSLList { case .orderedChoice, .conditional: return .some(nil) - // Groups (and other parent nodes) defer to the child. + // A negative lookahead rules out the existence of a safe required + // character. + case .nonCapturingGroup(let kind, _) where kind.isNegativeLookahead: + return .some(nil) + + // Other groups (and other parent nodes) defer to the child. case .nonCapturingGroup, .capture, .ignoreCapturesInTypedOutput, .limitCaptureNesting: @@ -208,7 +213,6 @@ extension DSLList { return _requiredAtomImpl(&position) ?? nil } - internal mutating func autoPossessifyNextQuantification(_ position: inout Int) -> (Int, DSLTree.Atom)? { guard position < nodes.count else { return nil From bf9b2a7710cacdde46bced1a5d763c48fb84af89 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Thu, 9 Oct 2025 18:28:03 -0500 Subject: [PATCH 6/8] Add character class / character exclusion --- Sources/_StringProcessing/Regex/DSLList.swift | 9 ++++++++- Tests/RegexTests/OptimizationTests.swift | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Sources/_StringProcessing/Regex/DSLList.swift b/Sources/_StringProcessing/Regex/DSLList.swift index 2ab5a38a6..00d1a0841 100644 --- a/Sources/_StringProcessing/Regex/DSLList.swift +++ b/Sources/_StringProcessing/Regex/DSLList.swift @@ -330,7 +330,14 @@ extension DSLTree.Atom { return a != b case (.characterClass(let a), .characterClass(let b)): return a.excludes(b) - + // FIXME: Need to track matching options so we can know if this actually matches + case (.characterClass(let a), .char(let b)), (.char(let b), .characterClass(let a)): + let s = "\(b)" + return a.asRuntimeModel(MatchingOptions()).matches(in: s, at: s.startIndex, limitedBy: s.endIndex) == nil + case (.characterClass(let a), .scalar(let b)), (.scalar(let b), .characterClass(let a)): + let s = "\(b)" + return a.asRuntimeModel(MatchingOptions()).matches(in: s, at: s.startIndex, limitedBy: s.endIndex) == nil + default: return false } diff --git a/Tests/RegexTests/OptimizationTests.swift b/Tests/RegexTests/OptimizationTests.swift index 83185df2d..6e64b5f82 100644 --- a/Tests/RegexTests/OptimizationTests.swift +++ b/Tests/RegexTests/OptimizationTests.swift @@ -31,7 +31,7 @@ import Testing } @available(macOS 9999, *) - @Test(arguments: [#/a+b/#, #/a*b/#, #/\w+\s/#, #/(?:a+b|b+a)/#, #/(?:(?:a+b)+b)/#]) + @Test(arguments: [#/a+b/#, #/a*b/#, #/\w+\s/#, #/(?:a+b|b+a)/#, #/(?:(?:a+b)+b)/#, #/\d+a/#]) func autoPossessify(pattern: Regex) throws { var list = DSLList(tree: pattern.program.tree) list.autoPossessify() From aa0de7861cbac498eb0a33104d56726f77ec1bcb Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Tue, 14 Oct 2025 13:44:18 -0500 Subject: [PATCH 7/8] Account for match options during possessification When deciding on exclusion during auto-possessification, awareness of the current matching options is important for correct analysis. For example, /a+A/ can be auto-possessified, but the case insensitive pattern /(?i)a+A/ cannot be. --- .../Optimizations/AutoPossessification.swift | 397 ++++++++++++++++++ Sources/_StringProcessing/Regex/DSLList.swift | 285 +------------ Sources/_StringProcessing/Regex/DSLTree.swift | 8 + Tests/RegexTests/MatchTests.swift | 25 ++ Tests/RegexTests/OptimizationTests.swift | 11 +- 5 files changed, 439 insertions(+), 287 deletions(-) create mode 100644 Sources/_StringProcessing/Optimizations/AutoPossessification.swift diff --git a/Sources/_StringProcessing/Optimizations/AutoPossessification.swift b/Sources/_StringProcessing/Optimizations/AutoPossessification.swift new file mode 100644 index 000000000..03993940f --- /dev/null +++ b/Sources/_StringProcessing/Optimizations/AutoPossessification.swift @@ -0,0 +1,397 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2025 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +extension DSLList { + private func _requiredAtomImpl( + _ position: inout Int, + options: inout MatchingOptions, + allowOptionsChanges: Bool + ) -> DSLTree.Atom?? { + guard position < nodes.count else { + return nil + } + + switch nodes[position] { + case .atom(let atom): + switch atom { + case .changeMatchingOptions(let seq): + // Exit early if an atom changes the matching options. + // TODO: Allow some/all options changes. + if allowOptionsChanges { + options.apply(seq.ast) + return nil + } else { + return .some(nil) + } + default: + return atom + } + + // In a concatenation, the first definitive child provides the answer, + // and then we need to skip past (in some cases at least) the remaining + // concatenation elements. + case .concatenation(let children): + var result: DSLTree.Atom?? = nil + var i = 0 + while i < children.count { + i += 1 + position += 1 + if let r = _requiredAtomImpl(&position, options: &options, allowOptionsChanges: allowOptionsChanges) { + result = r + break + } + } + + for _ in i.. DSLTree.Atom? { + var position = 0 + var options = MatchingOptions() + return _requiredAtomImpl(&position, options: &options, allowOptionsChanges: allowOptionsChanges) ?? nil + } + + internal mutating func autoPossessifyNextQuantification( + _ position: inout Int, + options: inout MatchingOptions + ) -> (Int, DSLTree.Atom)? { + guard position < nodes.count else { + return nil + } + + switch nodes[position] { + case .quantification(_, _, _): + let quantPosition = position + position += 1 + + // Do a search within this quantification's contents + // FIXME: How to handle an inner quantification surfacing here? + var innerPosition = position + _ = autoPossessifyNextQuantification(&innerPosition, options: &options) + + switch _requiredAtomImpl(&position, options: &options, allowOptionsChanges: false) { + case .some(let atom?): + return (quantPosition, atom) + case .none, .some(.none): + return nil + } + + case .concatenation(let children): + // If we find a valid quantification among this concatenation's components, + // we must look for a required atom in the sibling. If a definitive result + // is not found, pop up the recursion stack to find a sibling at a higher + // level. + var foundQuantification: (Int, DSLTree.Atom)? = nil + var foundNextAtom: DSLTree.Atom? = nil + var i = 0 + position += 1 + while i < children.count { + i += 1 + if let result = autoPossessifyNextQuantification(&position, options: &options) { + foundQuantification = result + break + } + } + + while i < children.count { + i += 1 + position += 1 + if let result = _requiredAtomImpl(&position, options: &options, allowOptionsChanges: false) { + foundNextAtom = result + break + } + } + + for _ in i.. Bool { + switch (self, other) { + case (.char(let a), .char(let b)): + // Two characters are mutually exclusive if one does not match against + // the other. + // + // Relevant options: + // - semantic level + // - case insensitivity + + if options.semanticLevel == .graphemeCluster { + // Just call String.match(Character, ...) + let s = String(a) + return nil == s.match( + b, at: s.startIndex, + limitedBy: s.endIndex, + isCaseInsensitive: options.isCaseInsensitive) + } else { + // Call String.matchScalar(Scalar, ...) for each in scalar sequence + let s = String(a) + var i = s.startIndex + var j = b.unicodeScalars.startIndex + while i < s.endIndex { + guard j < b.unicodeScalars.endIndex else { return true } + guard let nextIndex = s.matchScalar(b.unicodeScalars[j], at: i, limitedBy: s.endIndex, boundaryCheck: false, isCaseInsensitive: options.isCaseInsensitive) else { + return true + } + i = nextIndex + b.unicodeScalars.formIndex(after: &j) + } + return false + } + + case (.scalar(let a), .scalar(let b)): + // Two scalars are mutually exclusive if one does not match against + // the other. + // + // Relevant options: + // - case insensitivity + let s = String(a) + return nil == s.matchScalar( + b, at: s.startIndex, + limitedBy: s.endIndex, + boundaryCheck: false, + isCaseInsensitive: options.isCaseInsensitive) + + case (.characterClass(let a), .characterClass(let b)): + // Certain character classes are mutually exclusive of each other. + return a.excludes(b, options: options) + + // For character class and char/scalar, we can test against the class's model. + case (.characterClass(let a), .char(let b)), (.char(let b), .characterClass(let a)): + let s = "\(b)" + return nil == a.asRuntimeModel(options).matches(in: s, at: s.startIndex, limitedBy: s.endIndex) + case (.characterClass(let a), .scalar(let b)), (.scalar(let b), .characterClass(let a)): + let s = "\(b)" + return nil == a.asRuntimeModel(options).matches(in: s, at: s.startIndex, limitedBy: s.endIndex) + + default: + return false + } + } +} + +extension DSLTree.Atom.CharacterClass { + func excludes(_ other: Self, options: MatchingOptions) -> Bool { + if other == .anyGrapheme || other == .anyUnicodeScalar { + return false + } + + return switch self { + case .anyGrapheme, .anyUnicodeScalar: + false + + case .digit: + switch other { + case .whitespace, .horizontalWhitespace, .verticalWhitespace, .newlineSequence, + .notWord, .notDigit: true + default: false + } + case .notDigit: + other == .digit + + case .horizontalWhitespace: + switch other { + case .word, .digit, .verticalWhitespace, .newlineSequence, + .notWhitespace, .notHorizontalWhitespace: true + default: false + } + case .notHorizontalWhitespace: + other == .horizontalWhitespace + + case .newlineSequence: + switch other { + case .word, .digit, .horizontalWhitespace, .notNewline: true + default: false + } + case .notNewline: + other == .newlineSequence + + case .whitespace: + switch other { + case .word, .digit, .notWhitespace: true + default: false + } + case .notWhitespace: + other == .whitespace + + case .verticalWhitespace: + switch other { + case .word, .digit, .notWhitespace, .notVerticalWhitespace: true + default: false + } + case .notVerticalWhitespace: + other == .verticalWhitespace + + case .word: + switch other { + case .whitespace, .horizontalWhitespace, .verticalWhitespace, .newlineSequence, + .notWord: true + default: false + } + case .notWord: + other == .word + } + } +} diff --git a/Sources/_StringProcessing/Regex/DSLList.swift b/Sources/_StringProcessing/Regex/DSLList.swift index 00d1a0841..f8d09a953 100644 --- a/Sources/_StringProcessing/Regex/DSLList.swift +++ b/Sources/_StringProcessing/Regex/DSLList.swift @@ -95,9 +95,8 @@ extension DSLTree { } } - extension DSLList { - private func skipNode(_ position: inout Int) { + internal func skipNode(_ position: inout Int) { guard position < nodes.count else { return } @@ -120,290 +119,10 @@ extension DSLList { .limitCaptureNesting, .quantification: position += 1 skipNode(&position) - + case .customCharacterClass, .atom, .quotedLiteral, .matcher, .conditional, .absentFunction, .consumer, .characterPredicate, .trivia, .empty: break } } - - private func _requiredAtomImpl(_ position: inout Int) -> DSLTree.Atom?? { - guard position < nodes.count else { - return nil - } - - switch nodes[position] { - case .atom(let atom): - return switch atom { - case .changeMatchingOptions: - nil - default: - atom - } - - // In a concatenation, the first definitive child provides the answer, - // and then we need to skip past (in some cases at least) the remaining - // concatenation elements. - case .concatenation(let children): - var result: DSLTree.Atom?? = nil - var i = 0 - while i < children.count { - i += 1 - position += 1 - if let r = _requiredAtomImpl(&position) { - result = r - break - } - } - - for _ in i.. DSLTree.Atom? { - var position = 0 - return _requiredAtomImpl(&position) ?? nil - } - - internal mutating func autoPossessifyNextQuantification(_ position: inout Int) -> (Int, DSLTree.Atom)? { - guard position < nodes.count else { - return nil - } - - switch nodes[position] { - case .quantification(let amount, _, _): - let quantPosition = position - position += 1 - - // Do a search within this quantification's contents - // FIXME: How to handle an inner quantification surfacing here? - var innerPosition = position - _ = autoPossessifyNextQuantification(&innerPosition) - - switch _requiredAtomImpl(&position) { - case .some(let atom?): - return (quantPosition, atom) - case .none, .some(.none): - return nil - } - - case .concatenation(let children): - // If we find a valid quantification among this concatenation's components, - // we must look for a required atom in the sibling. If a definitive result - // is not found, pop up the recursion stack to find a sibling at a higher - // level. - var foundQuantification: (Int, DSLTree.Atom)? = nil - var foundNextAtom: DSLTree.Atom? = nil - var i = 0 - position += 1 - while i < children.count { - i += 1 - if let result = autoPossessifyNextQuantification(&position) { - foundQuantification = result - break - } - } - - while i < children.count { - i += 1 - position += 1 - if let result = _requiredAtomImpl(&position) { - foundNextAtom = result - break - } - } - - for _ in i.. Bool { - switch (self, other) { - case (.char(let a), .char(let b)): - return a != b - case (.scalar(let a), .scalar(let b)): - return a != b - case (.characterClass(let a), .characterClass(let b)): - return a.excludes(b) - // FIXME: Need to track matching options so we can know if this actually matches - case (.characterClass(let a), .char(let b)), (.char(let b), .characterClass(let a)): - let s = "\(b)" - return a.asRuntimeModel(MatchingOptions()).matches(in: s, at: s.startIndex, limitedBy: s.endIndex) == nil - case (.characterClass(let a), .scalar(let b)), (.scalar(let b), .characterClass(let a)): - let s = "\(b)" - return a.asRuntimeModel(MatchingOptions()).matches(in: s, at: s.startIndex, limitedBy: s.endIndex) == nil - - default: - return false - } - } -} - -extension DSLTree.Atom.CharacterClass { - func excludes(_ other: Self) -> Bool { - if other == .anyGrapheme || other == .anyUnicodeScalar { - return false - } - - return switch self { - case .anyGrapheme, .anyUnicodeScalar: - false - - case .digit: - switch other { - case .whitespace, .horizontalWhitespace, .verticalWhitespace, .newlineSequence, - .notWord, .notDigit: true - default: false - } - case .notDigit: - other == .digit - - case .horizontalWhitespace: - switch other { - case .word, .digit, .verticalWhitespace, .newlineSequence, - .notWhitespace, .notHorizontalWhitespace: true - default: false - } - case .notHorizontalWhitespace: - other == .horizontalWhitespace - - case .newlineSequence: - switch other { - case .word, .digit, .horizontalWhitespace, .notNewline: true - default: false - } - case .notNewline: - other == .newlineSequence - - case .whitespace: - switch other { - case .word, .digit, .notWhitespace: true - default: false - } - case .notWhitespace: - other == .whitespace - - case .verticalWhitespace: - switch other { - case .word, .digit, .notWhitespace, .notVerticalWhitespace: true - default: false - } - case .notVerticalWhitespace: - other == .verticalWhitespace - - case .word: - switch other { - case .whitespace, .horizontalWhitespace, .verticalWhitespace, .newlineSequence, - .notWord: true - default: false - } - case .notWord: - other == .word - } - } } diff --git a/Sources/_StringProcessing/Regex/DSLTree.swift b/Sources/_StringProcessing/Regex/DSLTree.swift index a95d40033..55d8902fa 100644 --- a/Sources/_StringProcessing/Regex/DSLTree.swift +++ b/Sources/_StringProcessing/Regex/DSLTree.swift @@ -1111,6 +1111,14 @@ extension DSLTree { internal var isNegativeLookahead: Bool { self.ast == .negativeLookahead } + + internal var isChangeMatchingOptions: Bool { + if case let .changeMatchingOptions = ast { + return true + } else { + return false + } + } } @_spi(RegexBuilder) diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index 78f133806..a87112b9e 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -737,6 +737,31 @@ extension RegexTests { ("baaaaabc", nil), ("baaaaaaaabc", nil)) + // Auto-possessification tests: + // - case sensitive + firstMatchTests( + "a+A", + ("aaaaA", "aaaaA"), + ("aaaaa", nil), + ("aaAaa", "aaA")) + // - case insensitive + firstMatchTests( + "(?i:a+A)", + ("aaaaA", "aaaaA"), + ("aaaaa", "aaaaa")) + firstMatchTests( + "(?i)a+A", + ("aaaaA", "aaaaA"), + ("aaaaa", "aaaaa")) + firstMatchTests( + "a+(?i:A)", + ("aaaaA", "aaaaA"), + ("aaaaa", "aaaaa")) + firstMatchTests( + "a+(?:(?i)A)", + ("aaaaA", "aaaaA"), + ("aaaaa", "aaaaa")) + // XFAIL'd possessive tests firstMatchTests( "a?+a", diff --git a/Tests/RegexTests/OptimizationTests.swift b/Tests/RegexTests/OptimizationTests.swift index 6e64b5f82..40bcd2011 100644 --- a/Tests/RegexTests/OptimizationTests.swift +++ b/Tests/RegexTests/OptimizationTests.swift @@ -18,7 +18,7 @@ import Testing @Test(arguments: [#/a/#, #/a+/#, #/(?:a+)/#, #/(?:a)+/#, #/(?m)a+/#, #/ab?c/#, #/(?:a+)+$/#, #/(?:(?:a+b)+b)/#]) func requiredFirstAtom(pattern: Regex) throws { let list = DSLList(tree: pattern.program.tree) - let atom = list.requiredFirstAtom() + let atom = list.requiredFirstAtom(allowOptionsChanges: true) #expect(atom?.literalCharacterValue == "a", "Missing first character atom in '\(pattern._literalPattern!)'") } @@ -26,12 +26,12 @@ import Testing @Test(arguments: [#/a?/#, #/(?:a|b)/#, #/[a]/#, #/a?bc/#]) func noRequiredFirstAtom(pattern: Regex) throws { let list = DSLList(tree: pattern.program.tree) - let atom = list.requiredFirstAtom() + let atom = list.requiredFirstAtom(allowOptionsChanges: true) #expect(atom == nil, "Unexpected required first atom in '\(pattern._literalPattern!)'") } @available(macOS 9999, *) - @Test(arguments: [#/a+b/#, #/a*b/#, #/\w+\s/#, #/(?:a+b|b+a)/#, #/(?:(?:a+b)+b)/#, #/\d+a/#]) + @Test(arguments: [#/a+b/#, #/a*b/#, #/\w+\s/#, #/(?:a+b|b+a)/#, #/(?:(?:a+b)+b)/#, #/\d+a/#, #/a+A/#]) func autoPossessify(pattern: Regex) throws { var list = DSLList(tree: pattern.program.tree) list.autoPossessify() @@ -47,7 +47,10 @@ import Testing } @available(macOS 9999, *) - @Test(arguments: [#/a?/#, #/a+a/#, #/(?:a|b)/#, #/(?:a+|b+)/#, #/[a]/#, #/a?a/#]) + @Test(arguments: [ + #/a?/#, #/a+a/#, #/a+(?:b|c)/#, #/(?:a+|b+)/#, #/[a]/#, #/a?a/#, + #/(?i)a+A/#, #/(?i:a+A)/# // case insensitivity when checking exclusion + ]) func noAutoPossessify(pattern: Regex) throws { var list = DSLList(tree: pattern.program.tree) list.autoPossessify() From beaccabad50da273b84cef5bf08d2468072e8ea3 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Wed, 15 Oct 2025 19:13:14 -0500 Subject: [PATCH 8/8] Limit auto-possessification to single atom quantifiers --- .../Optimizations/AutoPossessification.swift | 19 ++++++++++--------- Tests/RegexTests/OptimizationTests.swift | 5 +++-- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/Sources/_StringProcessing/Optimizations/AutoPossessification.swift b/Sources/_StringProcessing/Optimizations/AutoPossessification.swift index 03993940f..7a728365c 100644 --- a/Sources/_StringProcessing/Optimizations/AutoPossessification.swift +++ b/Sources/_StringProcessing/Optimizations/AutoPossessification.swift @@ -24,7 +24,6 @@ extension DSLList { switch atom { case .changeMatchingOptions(let seq): // Exit early if an atom changes the matching options. - // TODO: Allow some/all options changes. if allowOptionsChanges { options.apply(seq.ast) return nil @@ -143,15 +142,17 @@ extension DSLList { let quantPosition = position position += 1 - // Do a search within this quantification's contents - // FIXME: How to handle an inner quantification surfacing here? - var innerPosition = position - _ = autoPossessifyNextQuantification(&innerPosition, options: &options) - - switch _requiredAtomImpl(&position, options: &options, allowOptionsChanges: false) { - case .some(let atom?): + // Limit auto-possessification to a single quantified atom, to avoid + // issues of overlapped matches. + guard position < nodes.count else { + return nil + } + switch nodes[position] { + case .atom(let atom) where atom.isMatchable: return (quantPosition, atom) - case .none, .some(.none): + default: + var innerPosition = position + _ = autoPossessifyNextQuantification(&innerPosition, options: &options) return nil } diff --git a/Tests/RegexTests/OptimizationTests.swift b/Tests/RegexTests/OptimizationTests.swift index 40bcd2011..0fff0ebb1 100644 --- a/Tests/RegexTests/OptimizationTests.swift +++ b/Tests/RegexTests/OptimizationTests.swift @@ -31,7 +31,7 @@ import Testing } @available(macOS 9999, *) - @Test(arguments: [#/a+b/#, #/a*b/#, #/\w+\s/#, #/(?:a+b|b+a)/#, #/(?:(?:a+b)+b)/#, #/\d+a/#, #/a+A/#]) + @Test(arguments: [#/a+b/#, #/a*b/#, #/\w+\s/#, #/(?:a+b|b+a)/#, #/\d+a/#, #/a+A/#]) func autoPossessify(pattern: Regex) throws { var list = DSLList(tree: pattern.program.tree) list.autoPossessify() @@ -49,7 +49,8 @@ import Testing @available(macOS 9999, *) @Test(arguments: [ #/a?/#, #/a+a/#, #/a+(?:b|c)/#, #/(?:a+|b+)/#, #/[a]/#, #/a?a/#, - #/(?i)a+A/#, #/(?i:a+A)/# // case insensitivity when checking exclusion + #/(?i)a+A/#, #/(?i:a+A)/#, // case insensitivity when checking exclusion + #/(?:(?:ab)+b)/#, // single atom quantifications only ]) func noAutoPossessify(pattern: Regex) throws { var list = DSLList(tree: pattern.program.tree)