@@ -149,6 +149,14 @@ extension Source {
149149 return result
150150 }
151151
152+ /// Perform a lookahead using a temporary source. Within the body of the
153+ /// lookahead, any modifications to the source will not be reflected outside
154+ /// the body.
155+ func lookahead< T> ( _ body: ( inout Source ) throws -> T ) rethrows -> T {
156+ var src = self
157+ return try body ( & src)
158+ }
159+
152160 /// Attempt to eat the given character, returning its source location if
153161 /// successful, `nil` otherwise.
154162 mutating func tryEatWithLoc( _ c: Character ) -> SourceLocation ? {
@@ -413,9 +421,7 @@ extension Source {
413421 ) throws -> ( Located < Quant . Amount > , Located < Quant . Kind > , [ AST . Trivia ] ) ? {
414422 var trivia : [ AST . Trivia ] = [ ]
415423
416- if let t = try lexNonSemanticWhitespace ( context: context) {
417- trivia. append ( t)
418- }
424+ if let t = lexNonSemanticWhitespace ( context: context) { trivia. append ( t) }
419425
420426 let amt : Located < Quant . Amount > ? = try recordLoc { src in
421427 if src. tryEat ( " * " ) { return . zeroOrMore }
@@ -424,7 +430,7 @@ extension Source {
424430
425431 return try src. tryEating { src in
426432 guard src. tryEat ( " { " ) ,
427- let range = try src. lexRange ( context: context) ,
433+ let range = try src. lexRange ( context: context, trivia : & trivia ) ,
428434 src. tryEat ( " } " )
429435 else { return nil }
430436 return range. value
@@ -433,9 +439,7 @@ extension Source {
433439 guard let amt = amt else { return nil }
434440
435441 // PCRE allows non-semantic whitespace here in extended syntax mode.
436- if let t = try lexNonSemanticWhitespace ( context: context) {
437- trivia. append ( t)
438- }
442+ if let t = lexNonSemanticWhitespace ( context: context) { trivia. append ( t) }
439443
440444 let kind : Located < Quant . Kind > = recordLoc { src in
441445 if src. tryEat ( " ? " ) { return . reluctant }
@@ -452,11 +456,17 @@ extension Source {
452456 /// | ExpRange
453457 /// ExpRange -> '..<' <Int> | '...' <Int>
454458 /// | <Int> '..<' <Int> | <Int> '...' <Int>?
455- mutating func lexRange( context: ParsingContext ) throws -> Located < Quant . Amount > ? {
459+ mutating func lexRange(
460+ context: ParsingContext , trivia: inout [ AST . Trivia ]
461+ ) throws -> Located < Quant . Amount > ? {
456462 try recordLoc { src in
457463 try src. tryEating { src in
464+ if let t = src. lexWhitespace ( ) { trivia. append ( t) }
465+
458466 let lowerOpt = try src. lexNumber ( )
459467
468+ if let t = src. lexWhitespace ( ) { trivia. append ( t) }
469+
460470 // ',' or '...' or '..<' or nothing
461471 // TODO: We ought to try and consume whitespace here and emit a
462472 // diagnostic for the user warning them that it would cause the range to
@@ -476,11 +486,15 @@ extension Source {
476486 closedRange = nil
477487 }
478488
489+ if let t = src. lexWhitespace ( ) { trivia. append ( t) }
490+
479491 let upperOpt = try src. lexNumber ( ) ? . map { upper in
480492 // If we have an open range, the upper bound should be adjusted down.
481493 closedRange == true ? upper : upper - 1
482494 }
483495
496+ if let t = src. lexWhitespace ( ) { trivia. append ( t) }
497+
484498 switch ( lowerOpt, closedRange, upperOpt) {
485499 case let ( l? , nil , nil ) :
486500 return . exactly( l)
@@ -625,11 +639,11 @@ extension Source {
625639 ///
626640 mutating func lexComment( context: ParsingContext ) throws -> AST . Trivia ? {
627641 let trivia : Located < String > ? = try recordLoc { src in
628- if src. tryEat ( sequence: " (?# " ) {
629- return try src. expectQuoted ( endingWith : " ) " ) . value
642+ if !context . isInCustomCharacterClass && src. tryEat ( sequence: " (?# " ) {
643+ return try src. lexUntil ( eating : " ) " ) . value
630644 }
631645 if context. experimentalComments, src. tryEat ( sequence: " /*") {
632- return try src.expectQuoted(endingWith : "*/" ) . value
646+ return try src.lexUntil(eating : "*/" ) . value
633647 }
634648 if context. endOfLineComments, src. tryEat ( " # " ) {
635649 // Try eat until we either exhaust the input, or hit a newline. Note
@@ -667,7 +681,7 @@ extension Source {
667681 /// Does nothing unless `SyntaxOptions.nonSemanticWhitespace` is set
668682 mutating func lexNonSemanticWhitespace(
669683 context: ParsingContext
670- ) throws -> AST . Trivia ? {
684+ ) -> AST . Trivia ? {
671685 guard context. ignoreWhitespace else { return nil }
672686
673687 // FIXME: PCRE only treats space and tab characters as whitespace when
@@ -699,7 +713,7 @@ extension Source {
699713 if let comment = try lexComment ( context: context) {
700714 return comment
701715 }
702- if let whitespace = try lexNonSemanticWhitespace ( context: context) {
716+ if let whitespace = lexNonSemanticWhitespace ( context: context) {
703717 return whitespace
704718 }
705719 return nil
@@ -1178,8 +1192,7 @@ extension Source {
11781192 }
11791193 }
11801194
1181- mutating func lexCustomCCStart(
1182- ) throws -> Located < CustomCC . Start > ? {
1195+ mutating func lexCustomCCStart( ) -> Located < CustomCC . Start > ? {
11831196 recordLoc { src in
11841197 // Make sure we don't have a POSIX character property. This may require
11851198 // walking to its ending to make sure we have a closing ':]', as otherwise
@@ -1240,8 +1253,9 @@ extension Source {
12401253
12411254 private func canLexPOSIXCharacterProperty( ) -> Bool {
12421255 do {
1243- var src = self
1244- return try src. lexPOSIXCharacterProperty ( ) != nil
1256+ return try lookahead { src in
1257+ try src. lexPOSIXCharacterProperty ( ) != nil
1258+ }
12451259 } catch {
12461260 // We want to tend on the side of lexing a POSIX character property, so
12471261 // even if it is invalid in some way (e.g invalid property names), still
@@ -1394,10 +1408,11 @@ extension Source {
13941408
13951409 /// Checks whether a numbered reference can be lexed.
13961410 private func canLexNumberedReference( ) -> Bool {
1397- var src = self
1398- _ = src. tryEat ( anyOf: " + " , " - " )
1399- guard let next = src. peek ( ) else { return false }
1400- return RadixKind . decimal. characterFilter ( next)
1411+ lookahead { src in
1412+ _ = src. tryEat ( anyOf: " + " , " - " )
1413+ guard let next = src. peek ( ) else { return false }
1414+ return RadixKind . decimal. characterFilter ( next)
1415+ }
14011416 }
14021417
14031418 /// Eat a named reference up to a given closing delimiter.
@@ -1587,53 +1602,55 @@ extension Source {
15871602
15881603 /// Whether we can lex a group-like reference after the specifier '(?'.
15891604 private func canLexGroupLikeReference( ) -> Bool {
1590- var src = self
1591- if src. tryEat ( " P " ) {
1592- return src. tryEat ( anyOf: " = " , " > " ) != nil
1593- }
1594- if src. tryEat ( anyOf: " & " , " R " ) != nil {
1595- return true
1605+ lookahead { src in
1606+ if src. tryEat ( " P " ) {
1607+ return src. tryEat ( anyOf: " = " , " > " ) != nil
1608+ }
1609+ if src. tryEat ( anyOf: " & " , " R " ) != nil {
1610+ return true
1611+ }
1612+ return src. canLexNumberedReference ( )
15961613 }
1597- return src. canLexNumberedReference ( )
15981614 }
15991615
16001616 private func canLexMatchingOptionsAsAtom( context: ParsingContext ) -> Bool {
1601- var src = self
1602-
1603- // See if we can lex a matching option sequence that terminates in ')'. Such
1604- // a sequence is an atom. If an error is thrown, there are invalid elements
1605- // of the matching option sequence. In such a case, we can lex as a group
1606- // and diagnose the invalid group kind.
1607- guard ( try ? src. lexMatchingOptionSequence ( context: context) ) != nil else {
1608- return false
1617+ lookahead { src in
1618+ // See if we can lex a matching option sequence that terminates in ')'.
1619+ // Such a sequence is an atom. If an error is thrown, there are invalid
1620+ // elements of the matching option sequence. In such a case, we can lex as
1621+ // a group and diagnose the invalid group kind.
1622+ guard ( try ? src. lexMatchingOptionSequence ( context: context) ) != nil else {
1623+ return false
1624+ }
1625+ return src. tryEat ( " ) " )
16091626 }
1610- return src. tryEat ( " ) " )
16111627 }
16121628
16131629 /// Whether a group specifier should be lexed as an atom instead of a group.
16141630 private func shouldLexGroupLikeAtom( context: ParsingContext ) -> Bool {
1615- var src = self
1616- guard src. tryEat ( " ( " ) else { return false }
1631+ lookahead { src in
1632+ guard src. tryEat ( " ( " ) else { return false }
16171633
1618- if src. tryEat ( " ? " ) {
1619- // The start of a reference '(?P=', '(?R', ...
1620- if src. canLexGroupLikeReference ( ) { return true }
1634+ if src. tryEat ( " ? " ) {
1635+ // The start of a reference '(?P=', '(?R', ...
1636+ if src. canLexGroupLikeReference ( ) { return true }
16211637
1622- // The start of a PCRE callout.
1623- if src. tryEat ( " C " ) { return true }
1638+ // The start of a PCRE callout.
1639+ if src. tryEat ( " C " ) { return true }
16241640
1625- // The start of an Oniguruma 'of-contents' callout.
1626- if src. tryEat ( " { " ) { return true }
1641+ // The start of an Oniguruma 'of-contents' callout.
1642+ if src. tryEat ( " { " ) { return true }
16271643
1628- // A matching option atom (?x), (?i), ...
1629- if src. canLexMatchingOptionsAsAtom ( context: context) { return true }
1644+ // A matching option atom (?x), (?i), ...
1645+ if src. canLexMatchingOptionsAsAtom ( context: context) { return true }
1646+
1647+ return false
1648+ }
1649+ // The start of a backreference directive or Oniguruma named callout.
1650+ if src. tryEat ( " * " ) { return true }
16301651
16311652 return false
16321653 }
1633- // The start of a backreference directive or Oniguruma named callout.
1634- if src. tryEat ( " * " ) { return true }
1635-
1636- return false
16371654 }
16381655
16391656 /// Consume an escaped atom, starting from after the backslash
@@ -2022,20 +2039,11 @@ extension Source {
20222039 return AST . Atom ( kind. value, kind. location)
20232040 }
20242041
2025- /// Try to lex the end of a range in a custom character class, which consists
2026- /// of a '-' character followed by an atom.
2027- mutating func lexCustomCharClassRangeEnd(
2028- context: ParsingContext
2029- ) throws -> ( dashLoc: SourceLocation , AST . Atom ) ? {
2030- // Make sure we don't have a binary operator e.g '--', and the '-' is not
2031- // ending the custom character class (in which case it is literal).
2032- guard peekCCBinOp ( ) == nil , !starts( with: " -] " ) ,
2033- let dash = tryEatWithLoc ( " - " ) ,
2034- let end = try lexAtom ( context: context)
2035- else {
2036- return nil
2037- }
2038- return ( dash, end)
2042+ /// Try to lex the range operator '-' for a custom character class.
2043+ mutating func lexCustomCharacterClassRangeOperator( ) -> SourceLocation ? {
2044+ // Eat a '-', making sure we don't have a binary op such as '--'.
2045+ guard peekCCBinOp ( ) == nil else { return nil }
2046+ return tryEatWithLoc ( " - " )
20392047 }
20402048
20412049 /// Try to consume a newline sequence matching option kind.
0 commit comments