1111
1212// TODO: mock up multi-line soon
1313
14- enum Delimiter : Hashable , CaseIterable {
15- case traditional
16- case experimental
17- case reSingleQuote
18- case rxSingleQuote
19-
20- var openingAndClosing : ( opening: String , closing: String ) {
21- switch self {
22- case . traditional: return ( " #/ " , " /# " )
23- case . experimental: return ( " #| " , " |# " )
24- case . reSingleQuote: return ( " re' " , " ' " )
25- case . rxSingleQuote: return ( " rx' " , " ' " )
26- }
14+ struct Delimiter : Hashable {
15+ let kind : Kind
16+ let poundCount : Int
17+
18+ init ( _ kind: Kind , poundCount: Int ) {
19+ precondition ( kind. allowsExtendedPoundSyntax || poundCount == 0 )
20+ self . kind = kind
21+ self . poundCount = poundCount
22+ }
23+
24+ var opening : String {
25+ String ( repeating: " # " , count: poundCount) + kind. opening
26+ }
27+ var closing : String {
28+ kind. closing + String( repeating: " # " , count: poundCount)
2729 }
28- var opening : String { openingAndClosing. opening }
29- var closing : String { openingAndClosing. closing }
3030
3131 /// The default set of syntax options that the delimiter indicates.
3232 var defaultSyntaxOptions : SyntaxOptions {
33- switch self {
34- case . traditional , . reSingleQuote:
33+ switch kind {
34+ case . forwardSlash , . reSingleQuote:
3535 return . traditional
3636 case . experimental, . rxSingleQuote:
3737 return . experimental
3838 }
3939 }
4040}
4141
42+ extension Delimiter {
43+ enum Kind : Hashable , CaseIterable {
44+ case forwardSlash
45+ case experimental
46+ case reSingleQuote
47+ case rxSingleQuote
48+
49+ var openingAndClosing : ( opening: String , closing: String ) {
50+ switch self {
51+ case . forwardSlash: return ( " / " , " / " )
52+ case . experimental: return ( " #| " , " |# " )
53+ case . reSingleQuote: return ( " re' " , " ' " )
54+ case . rxSingleQuote: return ( " rx' " , " ' " )
55+ }
56+ }
57+ var opening : String { openingAndClosing. opening }
58+ var closing : String { openingAndClosing. closing }
59+
60+ /// Whether or not extended pound syntax e.g `##/.../##` is allowed with
61+ /// this delimiter.
62+ var allowsExtendedPoundSyntax : Bool {
63+ switch self {
64+ case . forwardSlash:
65+ return true
66+ case . experimental, . reSingleQuote, . rxSingleQuote:
67+ return false
68+ }
69+ }
70+ }
71+ }
72+
4273struct DelimiterLexError : Error , CustomStringConvertible {
4374 enum Kind : Hashable {
4475 case unterminated
@@ -120,25 +151,34 @@ fileprivate struct DelimiterLexer {
120151 precondition ( cursor <= end, " Cannot advance past end " )
121152 }
122153
123- /// Check to see if a UTF-8 sequence can be eaten from the current cursor.
124- func canEat( _ utf8: String . UTF8View ) -> Bool {
125- guard let slice = slice ( utf8. count) else { return false }
126- return slice. elementsEqual ( utf8)
154+ /// Check to see if a byte sequence can be eaten from the current cursor.
155+ func canEat< C : Collection > ( _ bytes: C ) -> Bool where C. Element == UInt8 {
156+ guard let slice = slice ( bytes. count) else { return false }
157+ return slice. elementsEqual ( bytes)
158+ }
159+
160+ /// Attempt to eat a byte sequence, returning `true` if successful.
161+ mutating func tryEat< C : Collection > (
162+ _ bytes: C
163+ ) -> Bool where C. Element == UInt8 {
164+ guard canEat ( bytes) else { return false }
165+ advanceCursor ( bytes. count)
166+ return true
127167 }
128168
129- /// Attempt to eat a UTF-8 byte sequence , returning `true` if successful.
130- mutating func tryEat( _ utf8 : String . UTF8View ) -> Bool {
131- guard canEat ( utf8 ) else { return false }
132- advanceCursor ( utf8 . count )
169+ /// Attempt to eat an ascii scalar , returning `true` if successful.
170+ mutating func tryEat( ascii s : Unicode . Scalar ) -> Bool {
171+ guard load ( ) == ascii ( s ) else { return false }
172+ advanceCursor ( )
133173 return true
134174 }
135175
136176 /// Attempt to skip over a closing delimiter character that is unlikely to be
137177 /// the actual closing delimiter.
138178 mutating func trySkipDelimiter( _ delimiter: Delimiter ) {
139179 // Only the closing `'` for re'...'/rx'...' can potentially be skipped over.
140- switch delimiter {
141- case . traditional , . experimental:
180+ switch delimiter. kind {
181+ case . forwardSlash , . experimental:
142182 return
143183 case . reSingleQuote, . rxSingleQuote:
144184 break
@@ -272,16 +312,42 @@ fileprivate struct DelimiterLexer {
272312 }
273313 }
274314
315+ mutating func tryLexOpeningDelimiter( poundCount: Int ) -> Delimiter ? {
316+ for kind in Delimiter . Kind. allCases {
317+ // If the delimiter allows extended pound syntax, or there are no pounds,
318+ // we just need to lex it.
319+ let opening = kind. opening. utf8
320+ if kind. allowsExtendedPoundSyntax || poundCount == 0 {
321+ guard tryEat ( opening) else { continue }
322+ return Delimiter ( kind, poundCount: poundCount)
323+ }
324+
325+ // The delimiter doesn't allow extended pound syntax, so the pounds must be
326+ // part of the delimiter.
327+ guard
328+ poundCount < opening. count,
329+ opening. prefix ( poundCount)
330+ . elementsEqual ( repeatElement ( ascii ( " # " ) , count: poundCount) ) ,
331+ tryEat ( opening. dropFirst ( poundCount) )
332+ else { continue }
333+
334+ return Delimiter ( kind, poundCount: 0 )
335+ }
336+ return nil
337+ }
338+
275339 /*consuming*/ mutating func lex(
276340 ) throws -> ( contents: String , Delimiter , end: UnsafeRawPointer ) {
341+ // We can consume any number of pound signs.
342+ var poundCount = 0
343+ while tryEat ( ascii: " # " ) {
344+ poundCount += 1
345+ }
277346
278347 // Try to lex the opening delimiter.
279- guard let delimiter = Delimiter . allCases. first (
280- where: { tryEat ( $0. opening. utf8) }
281- ) else {
348+ guard let delimiter = tryLexOpeningDelimiter ( poundCount: poundCount) else {
282349 throw DelimiterLexError ( . unknownDelimiter, resumeAt: cursor. successor ( ) )
283350 }
284-
285351 let contentsStart = cursor
286352 while true {
287353 // Check to see if we're at a character that looks like a delimiter, but
@@ -302,20 +368,34 @@ fileprivate struct DelimiterLexer {
302368/// Drop a set of regex delimiters from the input string, returning the contents
303369/// and the delimiter used. The input string must have valid delimiters.
304370func droppingRegexDelimiters( _ str: String ) -> ( String , Delimiter ) {
305- func stripDelimiter( _ delim: Delimiter ) -> String ? {
371+ func stripDelimiter( _ kind: Delimiter . Kind ) -> ( String , Delimiter ) ? {
372+ var slice = str. utf8 [ ... ]
373+
374+ // Try strip any number of opening '#'s.
375+ var poundCount = 0
376+ if kind. allowsExtendedPoundSyntax {
377+ poundCount = slice. prefix ( while: {
378+ $0 == UInt8 ( ( " # " as UnicodeScalar ) . value)
379+ } ) . count
380+ slice = slice. dropFirst ( poundCount)
381+ }
382+
306383 // The opening delimiter must match.
307- guard var slice = str . utf8 . tryDropPrefix ( delim . opening. utf8)
384+ guard var slice = slice . tryDropPrefix ( kind . opening. utf8)
308385 else { return nil }
309386
310387 // The closing delimiter may optionally match, as it may not be present in
311388 // invalid code.
389+ let delim = Delimiter ( kind, poundCount: poundCount)
312390 if let newSlice = slice. tryDropSuffix ( delim. closing. utf8) {
313391 slice = newSlice
314392 }
315- return String ( slice)
393+ let result = String ( decoding: slice, as: UTF8 . self)
394+ precondition ( result. utf8. elementsEqual ( slice) )
395+ return ( result, delim)
316396 }
317- for d in Delimiter . allCases {
318- if let contents = stripDelimiter ( d ) {
397+ for kind in Delimiter . Kind . allCases {
398+ if let ( contents, d ) = stripDelimiter ( kind ) {
319399 return ( contents, d)
320400 }
321401 }
0 commit comments