@@ -74,6 +74,9 @@ fileprivate extension Compiler.ByteCodeGen {
7474 emitMatchScalar ( s)
7575 }
7676
77+ case let . characterClass( cc) :
78+ emitCharacterClass ( cc)
79+
7780 case let . assertion( kind) :
7881 try emitAssertion ( kind)
7982
@@ -148,147 +151,24 @@ fileprivate extension Compiler.ByteCodeGen {
148151 }
149152 }
150153
151- mutating func emitStartOfLine( ) {
152- builder. buildAssert { [ semanticLevel = options. semanticLevel]
153- ( _, _, input, pos, subjectBounds) in
154- if pos == subjectBounds. lowerBound { return true }
155- switch semanticLevel {
156- case . graphemeCluster:
157- return input [ input. index ( before: pos) ] . isNewline
158- case . unicodeScalar:
159- return input. unicodeScalars [ input. unicodeScalars. index ( before: pos) ] . isNewline
160- }
161- }
162- }
163-
164- mutating func emitEndOfLine( ) {
165- builder. buildAssert { [ semanticLevel = options. semanticLevel]
166- ( _, _, input, pos, subjectBounds) in
167- if pos == subjectBounds. upperBound { return true }
168- switch semanticLevel {
169- case . graphemeCluster:
170- return input [ pos] . isNewline
171- case . unicodeScalar:
172- return input. unicodeScalars [ pos] . isNewline
173- }
174- }
175- }
176-
177154 mutating func emitAssertion(
178155 _ kind: DSLTree . Atom . Assertion
179156 ) throws {
180- // FIXME: Depends on API model we have... We may want to
181- // think through some of these with API interactions in mind
182- //
183- // This might break how we use `bounds` for both slicing
184- // and things like `firstIndex`, that is `firstIndex` may
185- // need to supply both a slice bounds and a per-search bounds.
186- switch kind {
187- case . startOfSubject:
188- builder. buildAssert { ( _, _, input, pos, subjectBounds) in
189- pos == subjectBounds. lowerBound
190- }
191-
192- case . endOfSubjectBeforeNewline:
193- builder. buildAssert { [ semanticLevel = options. semanticLevel]
194- ( _, _, input, pos, subjectBounds) in
195- if pos == subjectBounds. upperBound { return true }
196- switch semanticLevel {
197- case . graphemeCluster:
198- return input. index ( after: pos) == subjectBounds. upperBound
199- && input [ pos] . isNewline
200- case . unicodeScalar:
201- return input. unicodeScalars. index ( after: pos) == subjectBounds. upperBound
202- && input. unicodeScalars [ pos] . isNewline
203- }
204- }
205-
206- case . endOfSubject:
207- builder. buildAssert { ( _, _, input, pos, subjectBounds) in
208- pos == subjectBounds. upperBound
209- }
210-
211- case . resetStartOfMatch:
212- // FIXME: Figure out how to communicate this out
157+ if kind == . resetStartOfMatch {
213158 throw Unsupported ( #"\K (reset/keep assertion)"# )
214-
215- case . firstMatchingPositionInSubject:
216- // TODO: We can probably build a nice model with API here
217-
218- // FIXME: This needs to be based on `searchBounds`,
219- // not the `subjectBounds` given as an argument here
220- builder. buildAssert { ( _, _, input, pos, subjectBounds) in false }
221-
222- case . textSegment:
223- builder. buildAssert { ( _, _, input, pos, _) in
224- // FIXME: Grapheme or word based on options
225- input. isOnGraphemeClusterBoundary ( pos)
226- }
227-
228- case . notTextSegment:
229- builder. buildAssert { ( _, _, input, pos, _) in
230- // FIXME: Grapheme or word based on options
231- !input. isOnGraphemeClusterBoundary ( pos)
232- }
233-
234- case . startOfLine:
235- emitStartOfLine ( )
236-
237- case . endOfLine:
238- emitEndOfLine ( )
239-
240- case . caretAnchor:
241- if options. anchorsMatchNewlines {
242- emitStartOfLine ( )
243- } else {
244- builder. buildAssert { ( _, _, input, pos, subjectBounds) in
245- pos == subjectBounds. lowerBound
246- }
247- }
248-
249- case . dollarAnchor:
250- if options. anchorsMatchNewlines {
251- emitEndOfLine ( )
252- } else {
253- builder. buildAssert { ( _, _, input, pos, subjectBounds) in
254- pos == subjectBounds. upperBound
255- }
256- }
257-
258- case . wordBoundary:
259- builder. buildAssert { [ options]
260- ( cache, maxIndex, input, pos, subjectBounds) in
261- if options. usesSimpleUnicodeBoundaries {
262- // TODO: How should we handle bounds?
263- return _CharacterClassModel. word. isBoundary (
264- input,
265- at: pos,
266- bounds: subjectBounds,
267- with: options
268- )
269- } else {
270- return input. isOnWordBoundary ( at: pos, using: & cache, & maxIndex)
271- }
272- }
273-
274- case . notWordBoundary:
275- builder. buildAssert { [ options]
276- ( cache, maxIndex, input, pos, subjectBounds) in
277- if options. usesSimpleUnicodeBoundaries {
278- // TODO: How should we handle bounds?
279- return !_CharacterClassModel. word. isBoundary (
280- input,
281- at: pos,
282- bounds: subjectBounds,
283- with: options
284- )
285- } else {
286- return !input. isOnWordBoundary ( at: pos, using: & cache, & maxIndex)
287- }
288- }
289159 }
160+ builder. buildAssert (
161+ by: kind,
162+ options. anchorsMatchNewlines,
163+ options. usesSimpleUnicodeBoundaries,
164+ options. usesASCIIWord,
165+ options. semanticLevel)
290166 }
291-
167+
168+ mutating func emitCharacterClass( _ cc: DSLTree . Atom . CharacterClass ) {
169+ builder. buildMatchBuiltin ( model: cc. asRuntimeModel ( options) )
170+ }
171+
292172 mutating func emitMatchScalar( _ s: UnicodeScalar ) {
293173 assert ( options. semanticLevel == . unicodeScalar)
294174 if options. isCaseInsensitive && s. properties. isCased {
@@ -907,10 +787,10 @@ fileprivate extension Compiler.ByteCodeGen {
907787 } else {
908788 builder. buildMatchAsciiBitset ( asciiBitset)
909789 }
910- } else {
911- let consumer = try ccc. generateConsumer ( options)
912- builder. buildConsume ( by: consumer)
790+ return
913791 }
792+ let consumer = try ccc. generateConsumer ( options)
793+ builder. buildConsume ( by: consumer)
914794 }
915795
916796 mutating func emitConcatenation( _ children: [ DSLTree . Node ] ) throws {
0 commit comments