99//
1010//===----------------------------------------------------------------------===//
1111
12- // TODO: mock up multi-line soon
13-
14- enum Delimiter : Hashable , CaseIterable {
15- case traditional
16- case experimental
17- case reSingleQuote
18- case rxSingleQuote
19-
20- var openingAndClosing : ( opening: String , closing: String ) {
21- switch self {
22- case . traditional: return ( " #/ " , " /# " )
23- case . experimental: return ( " #| " , " |# " )
24- case . reSingleQuote: return ( " re' " , " ' " )
25- case . rxSingleQuote: return ( " rx' " , " ' " )
12+ struct Delimiter : Hashable {
13+ let kind : Kind
14+ let poundCount : Int
15+
16+ init ( _ kind: Kind , poundCount: Int ) {
17+ precondition ( kind. allowsExtendedPoundSyntax || poundCount == 0 )
18+ self . kind = kind
19+ self . poundCount = poundCount
20+ }
21+
22+ var opening : String {
23+ String ( repeating: " # " , count: poundCount) + kind. opening
24+ }
25+ var closing : String {
26+ kind. closing + String( repeating: " # " , count: poundCount)
27+ }
28+
29+ /// Whether or not multi-line mode is permitted.
30+ var allowsMultiline : Bool {
31+ switch kind {
32+ case . forwardSlash:
33+ return poundCount > 0
34+ case . experimental, . reSingleQuote, . rxSingleQuote:
35+ return false
2636 }
2737 }
28- var opening : String { openingAndClosing. opening }
29- var closing : String { openingAndClosing. closing }
30-
31- /// The default set of syntax options that the delimiter indicates.
32- var defaultSyntaxOptions : SyntaxOptions {
33- switch self {
34- case . traditional, . reSingleQuote:
35- return . traditional
36- case . experimental, . rxSingleQuote:
37- return . experimental
38+
39+ /// The delimiters which are currently enabled.
40+ static var enabledDelimiters : [ Kind ] { [ . forwardSlash] }
41+
42+ /// All known delimiters.
43+ static var allDelimiters : [ Kind ] { Kind . allCases }
44+ }
45+
46+ extension Delimiter {
47+ enum Kind : Hashable , CaseIterable {
48+ case forwardSlash
49+ case experimental
50+ case reSingleQuote
51+ case rxSingleQuote
52+
53+ var openingAndClosing : ( opening: String , closing: String ) {
54+ switch self {
55+ case . forwardSlash: return ( " / " , " / " )
56+ case . experimental: return ( " #| " , " |# " )
57+ case . reSingleQuote: return ( " re' " , " ' " )
58+ case . rxSingleQuote: return ( " rx' " , " ' " )
59+ }
60+ }
61+ var opening : String { openingAndClosing. opening }
62+ var closing : String { openingAndClosing. closing }
63+
64+ /// Whether or not extended pound syntax e.g `##/.../##` is allowed with
65+ /// this delimiter.
66+ var allowsExtendedPoundSyntax : Bool {
67+ switch self {
68+ case . forwardSlash:
69+ return true
70+ case . experimental, . reSingleQuote, . rxSingleQuote:
71+ return false
72+ }
3873 }
3974 }
4075}
4176
4277struct DelimiterLexError : Error , CustomStringConvertible {
4378 enum Kind : Hashable {
44- case endOfString
79+ case unterminated
4580 case invalidUTF8 // TODO: better range reporting
4681 case unknownDelimiter
4782 case unprintableASCII
83+ case multilineClosingNotOnNewline
4884 }
4985
5086 var kind : Kind
@@ -59,10 +95,11 @@ struct DelimiterLexError: Error, CustomStringConvertible {
5995
6096 var description : String {
6197 switch kind {
62- case . endOfString : return " unterminated regex literal "
98+ case . unterminated : return " unterminated regex literal "
6399 case . invalidUTF8: return " invalid UTF-8 found in source file "
64100 case . unknownDelimiter: return " unknown regex literal delimiter "
65101 case . unprintableASCII: return " unprintable ASCII character found in source file "
102+ case . multilineClosingNotOnNewline: return " closing delimiter must appear on new line "
66103 }
67104 }
68105}
@@ -72,11 +109,18 @@ fileprivate struct DelimiterLexer {
72109 var cursor : UnsafeRawPointer
73110 let end : UnsafeRawPointer
74111
75- init ( start: UnsafeRawPointer , end: UnsafeRawPointer ) {
112+ var firstNewline : UnsafeRawPointer ?
113+ var isMultiline : Bool { firstNewline != nil }
114+
115+ let delimiters : [ Delimiter . Kind ]
116+
117+ init ( start: UnsafeRawPointer , end: UnsafeRawPointer ,
118+ delimiters: [ Delimiter . Kind ] ) {
76119 precondition ( start <= end)
77120 self . start = start
78121 self . cursor = start
79122 self . end = end
123+ self . delimiters = delimiters
80124 }
81125
82126 func ascii( _ s: Unicode . Scalar ) -> UInt8 {
@@ -120,25 +164,34 @@ fileprivate struct DelimiterLexer {
120164 precondition ( cursor <= end, " Cannot advance past end " )
121165 }
122166
123- /// Check to see if a UTF-8 sequence can be eaten from the current cursor.
124- func canEat( _ utf8 : String . UTF8View ) -> Bool {
125- guard let slice = slice ( utf8 . count) else { return false }
126- return slice. elementsEqual ( utf8 )
167+ /// Check to see if a byte sequence can be eaten from the current cursor.
168+ func canEat< C : Collection > ( _ bytes : C ) -> Bool where C . Element == UInt8 {
169+ guard let slice = slice ( bytes . count) else { return false }
170+ return slice. elementsEqual ( bytes )
127171 }
128172
129- /// Attempt to eat a UTF-8 byte sequence, returning `true` if successful.
130- mutating func tryEat( _ utf8: String . UTF8View ) -> Bool {
131- guard canEat ( utf8) else { return false }
132- advanceCursor ( utf8. count)
173+ /// Attempt to eat a byte sequence, returning `true` if successful.
174+ mutating func tryEat< C : Collection > (
175+ _ bytes: C
176+ ) -> Bool where C. Element == UInt8 {
177+ guard canEat ( bytes) else { return false }
178+ advanceCursor ( bytes. count)
179+ return true
180+ }
181+
182+ /// Attempt to eat an ascii scalar, returning `true` if successful.
183+ mutating func tryEat( ascii s: Unicode . Scalar ) -> Bool {
184+ guard load ( ) == ascii ( s) else { return false }
185+ advanceCursor ( )
133186 return true
134187 }
135188
136189 /// Attempt to skip over a closing delimiter character that is unlikely to be
137190 /// the actual closing delimiter.
138191 mutating func trySkipDelimiter( _ delimiter: Delimiter ) {
139192 // Only the closing `'` for re'...'/rx'...' can potentially be skipped over.
140- switch delimiter {
141- case . traditional , . experimental:
193+ switch delimiter. kind {
194+ case . forwardSlash , . experimental:
142195 return
143196 case . reSingleQuote, . rxSingleQuote:
144197 break
@@ -222,12 +275,23 @@ fileprivate struct DelimiterLexer {
222275 let contentsEnd = cursor
223276 guard tryEat ( delimiter. closing. utf8) else { return nil }
224277
225- // Form a string from the contents and make sure it's valid UTF-8.
226278 let count = contentsEnd - contentsStart
227279 let contents = UnsafeRawBufferPointer (
228280 start: contentsStart, count: count)
229- let s = String ( decoding: contents, as: UTF8 . self)
230281
282+ // In multi-line mode, we must be on a new line. So scan backwards and make
283+ // sure we only have whitespace until the newline.
284+ if isMultiline {
285+ let idx = contents. lastIndex (
286+ where: { $0 == ascii ( " \n " ) || $0 == ascii ( " \r " ) } ) ! + 1
287+ guard contents [ idx... ] . all ( { $0 == ascii ( " " ) || $0 == ascii ( " \t " ) } )
288+ else {
289+ throw DelimiterLexError ( . multilineClosingNotOnNewline, resumeAt: cursor)
290+ }
291+ }
292+
293+ // Form a string from the contents and make sure it's valid UTF-8.
294+ let s = String ( decoding: contents, as: UTF8 . self)
231295 guard s. utf8. elementsEqual ( contents) else {
232296 throw DelimiterLexError ( . invalidUTF8, resumeAt: cursor)
233297 }
@@ -238,7 +302,10 @@ fileprivate struct DelimiterLexer {
238302 /// the end of the buffer is reached.
239303 mutating func advance( escaped: Bool = false ) throws {
240304 guard let next = load ( ) else {
241- throw DelimiterLexError ( . endOfString, resumeAt: cursor)
305+ // We've hit the end of the buffer. In multi-line mode, we don't want to
306+ // skip over what is likely otherwise valid Swift code, so resume from the
307+ // first newline.
308+ throw DelimiterLexError ( . unterminated, resumeAt: firstNewline ?? cursor)
242309 }
243310 switch UnicodeScalar ( next) {
244311 case let next where !next. isASCII:
@@ -249,7 +316,10 @@ fileprivate struct DelimiterLexer {
249316 advanceCursor ( )
250317
251318 case " \n " , " \r " :
252- throw DelimiterLexError ( . endOfString, resumeAt: cursor)
319+ guard isMultiline else {
320+ throw DelimiterLexError ( . unterminated, resumeAt: cursor)
321+ }
322+ advanceCursor ( )
253323
254324 case " \0 " :
255325 // TODO: Warn to match the behavior of String literal lexer? Or should
@@ -261,8 +331,12 @@ fileprivate struct DelimiterLexer {
261331 advanceCursor ( )
262332 try advance ( escaped: true )
263333
264- case let next where !next. isPrintableASCII:
334+ case let next
335+ where !next. isPrintableASCII && !( isMultiline && next == " \t " ) :
265336 // Diagnose unprintable ASCII.
337+ // Note that tabs are allowed in multi-line literals.
338+ // TODO: This matches the string literal behavior, but should we allow
339+ // tabs for single-line regex literals too?
266340 // TODO: Ideally we would recover and continue to lex until the ending
267341 // delimiter.
268342 throw DelimiterLexError ( . unprintableASCII, resumeAt: cursor. successor ( ) )
@@ -272,17 +346,60 @@ fileprivate struct DelimiterLexer {
272346 }
273347 }
274348
349+ mutating func tryLexOpeningDelimiter( poundCount: Int ) -> Delimiter ? {
350+ for kind in delimiters {
351+ // If the delimiter allows extended pound syntax, or there are no pounds,
352+ // we just need to lex it.
353+ let opening = kind. opening. utf8
354+ if kind. allowsExtendedPoundSyntax || poundCount == 0 {
355+ guard tryEat ( opening) else { continue }
356+ return Delimiter ( kind, poundCount: poundCount)
357+ }
358+
359+ // The delimiter doesn't allow extended pound syntax, so the pounds must be
360+ // part of the delimiter.
361+ guard
362+ poundCount < opening. count,
363+ opening. prefix ( poundCount)
364+ . elementsEqual ( repeatElement ( ascii ( " # " ) , count: poundCount) ) ,
365+ tryEat ( opening. dropFirst ( poundCount) )
366+ else { continue }
367+
368+ return Delimiter ( kind, poundCount: 0 )
369+ }
370+ return nil
371+ }
372+
275373 /*consuming*/ mutating func lex(
276374 ) throws -> ( contents: String , Delimiter , end: UnsafeRawPointer ) {
375+ // We can consume any number of pound signs.
376+ var poundCount = 0
377+ while tryEat ( ascii: " # " ) {
378+ poundCount += 1
379+ }
277380
278381 // Try to lex the opening delimiter.
279- guard let delimiter = Delimiter . allCases. first (
280- where: { tryEat ( $0. opening. utf8) }
281- ) else {
382+ guard let delimiter = tryLexOpeningDelimiter ( poundCount: poundCount) else {
282383 throw DelimiterLexError ( . unknownDelimiter, resumeAt: cursor. successor ( ) )
283384 }
284-
285385 let contentsStart = cursor
386+
387+ // If the delimiter allows multi-line, try skipping over any whitespace to a
388+ // newline character. If we can do that, we enter multi-line mode.
389+ if delimiter. allowsMultiline {
390+ while let next = load ( ) {
391+ switch next {
392+ case ascii ( " " ) , ascii ( " \t " ) :
393+ advanceCursor ( )
394+ continue
395+ case ascii ( " \n " ) , ascii ( " \r " ) :
396+ firstNewline = cursor
397+ default :
398+ break
399+ }
400+ break
401+ }
402+ }
286403 while true {
287404 // Check to see if we're at a character that looks like a delimiter, but
288405 // likely isn't. In such a case, we can attempt to skip over it.
@@ -302,20 +419,34 @@ fileprivate struct DelimiterLexer {
302419/// Drop a set of regex delimiters from the input string, returning the contents
303420/// and the delimiter used. The input string must have valid delimiters.
304421func droppingRegexDelimiters( _ str: String ) -> ( String , Delimiter ) {
305- func stripDelimiter( _ delim: Delimiter ) -> String ? {
422+ func stripDelimiter( _ kind: Delimiter . Kind ) -> ( String , Delimiter ) ? {
423+ var slice = str. utf8 [ ... ]
424+
425+ // Try strip any number of opening '#'s.
426+ var poundCount = 0
427+ if kind. allowsExtendedPoundSyntax {
428+ poundCount = slice. prefix ( while: {
429+ $0 == UInt8 ( ( " # " as UnicodeScalar ) . value)
430+ } ) . count
431+ slice = slice. dropFirst ( poundCount)
432+ }
433+
306434 // The opening delimiter must match.
307- guard var slice = str . utf8 . tryDropPrefix ( delim . opening. utf8)
435+ guard var slice = slice . tryDropPrefix ( kind . opening. utf8)
308436 else { return nil }
309437
310438 // The closing delimiter may optionally match, as it may not be present in
311439 // invalid code.
440+ let delim = Delimiter ( kind, poundCount: poundCount)
312441 if let newSlice = slice. tryDropSuffix ( delim. closing. utf8) {
313442 slice = newSlice
314443 }
315- return String ( slice)
444+ let result = String ( decoding: slice, as: UTF8 . self)
445+ precondition ( result. utf8. elementsEqual ( slice) )
446+ return ( result, delim)
316447 }
317- for d in Delimiter . allCases {
318- if let contents = stripDelimiter ( d ) {
448+ for kind in Delimiter . allDelimiters {
449+ if let ( contents, d ) = stripDelimiter ( kind ) {
319450 return ( contents, d)
320451 }
321452 }
@@ -325,8 +456,9 @@ func droppingRegexDelimiters(_ str: String) -> (String, Delimiter) {
325456/// Attempt to lex a regex literal between `start` and `end`, returning either
326457/// the contents and pointer from which to resume lexing, or an error.
327458func lexRegex(
328- start: UnsafeRawPointer , end: UnsafeRawPointer
459+ start: UnsafeRawPointer , end: UnsafeRawPointer ,
460+ delimiters: [ Delimiter . Kind ] = Delimiter . enabledDelimiters
329461) throws -> ( contents: String , Delimiter , end: UnsafeRawPointer ) {
330- var lexer = DelimiterLexer ( start: start, end: end)
462+ var lexer = DelimiterLexer ( start: start, end: end, delimiters : delimiters )
331463 return try lexer. lex ( )
332464}
0 commit comments