@@ -342,8 +342,8 @@ extension Source {
342342 } . value
343343 }
344344
345- /// Eat a scalar off the front, starting from after the
346- /// backslash and base character (e.g. `\u` or `\x`).
345+ /// Try to eat a scalar off the front, starting from after the backslash and
346+ /// base character (e.g. `\u` or `\x`).
347347 ///
348348 /// UniScalar -> 'u{' UniScalarSequence '}'
349349 /// | 'u' HexDigit{4}
@@ -353,60 +353,60 @@ extension Source {
353353 /// | 'o{' OctalDigit{1...} '}'
354354 /// | '0' OctalDigit{0...3}
355355 ///
356- mutating func expectUnicodeScalar(
357- escapedCharacter base: Character
358- ) throws -> AST . Atom . Kind {
356+ mutating func lexUnicodeScalar( ) throws -> AST . Atom . Kind ? {
359357 try recordLoc { src in
358+ try src. tryEating { src in
360359
361- func nullScalar( ) -> AST . Atom . Kind {
362- let pos = src. currentPosition
363- return . scalar( . init( UnicodeScalar ( 0 ) , SourceLocation ( pos ..< pos) ) )
364- }
365-
366- // TODO: PCRE offers a different behavior if PCRE2_ALT_BSUX is set.
367- switch base {
368- // Hex numbers.
369- case " u " where src. tryEat ( " { " ) :
370- return try src. expectUnicodeScalarSequence ( eating: " } " )
371-
372- case " x " where src. tryEat ( " { " ) :
373- let str = try src. lexUntil ( eating: " } " )
374- return . scalar( try Source . validateUnicodeScalar ( str, . hex) )
375-
376- case " x " :
377- // \x expects *up to* 2 digits.
378- guard let digits = src. tryEatLocatedPrefix ( maxLength: 2 , \. isHexDigit)
379- else {
380- // In PCRE, \x without any valid hex digits is \u{0}.
381- // TODO: This doesn't appear to be followed by ICU or Oniguruma, so
382- // could be changed to throw an error if we had a parsing mode for
383- // them.
384- return nullScalar ( )
360+ func nullScalar( ) -> AST . Atom . Kind {
361+ let pos = src. currentPosition
362+ return . scalar( . init( UnicodeScalar ( 0 ) , SourceLocation ( pos ..< pos) ) )
385363 }
386- return . scalar( try Source . validateUnicodeScalar ( digits, . hex) )
387364
388- case " u " :
389- return . scalar( try src. expectUnicodeScalar ( numDigits: 4 ) )
390- case " U " :
391- return . scalar( try src. expectUnicodeScalar ( numDigits: 8 ) )
365+ // TODO: PCRE offers a different behavior if PCRE2_ALT_BSUX is set.
366+ switch src. tryEat ( ) {
367+ // Hex numbers.
368+ case " u " where src. tryEat ( " { " ) :
369+ return try src. expectUnicodeScalarSequence ( eating: " } " )
370+
371+ case " x " where src. tryEat ( " { " ) :
372+ let str = try src. lexUntil ( eating: " } " )
373+ return . scalar( try Source . validateUnicodeScalar ( str, . hex) )
374+
375+ case " x " :
376+ // \x expects *up to* 2 digits.
377+ guard let digits = src. tryEatLocatedPrefix ( maxLength: 2 , \. isHexDigit)
378+ else {
379+ // In PCRE, \x without any valid hex digits is \u{0}.
380+ // TODO: This doesn't appear to be followed by ICU or Oniguruma, so
381+ // could be changed to throw an error if we had a parsing mode for
382+ // them.
383+ return nullScalar ( )
384+ }
385+ return . scalar( try Source . validateUnicodeScalar ( digits, . hex) )
386+
387+ case " u " :
388+ return . scalar( try src. expectUnicodeScalar ( numDigits: 4 ) )
389+ case " U " :
390+ return . scalar( try src. expectUnicodeScalar ( numDigits: 8 ) )
391+
392+ // Octal numbers.
393+ case " o " where src. tryEat ( " { " ) :
394+ let str = try src. lexUntil ( eating: " } " )
395+ return . scalar( try Source . validateUnicodeScalar ( str, . octal) )
396+
397+ case " 0 " :
398+ // We can read *up to* 3 more octal digits.
399+ // FIXME: PCRE can only read up to 2 octal digits, if we get a strict
400+ // PCRE mode, we should limit it here.
401+ guard let digits = src. tryEatLocatedPrefix ( maxLength: 3 , \. isOctalDigit)
402+ else {
403+ return nullScalar ( )
404+ }
405+ return . scalar( try Source . validateUnicodeScalar ( digits, . octal) )
392406
393- // Octal numbers.
394- case " o " where src. tryEat ( " { " ) :
395- let str = try src. lexUntil ( eating: " } " )
396- return . scalar( try Source . validateUnicodeScalar ( str, . octal) )
397-
398- case " 0 " :
399- // We can read *up to* 3 more octal digits.
400- // FIXME: PCRE can only read up to 2 octal digits, if we get a strict
401- // PCRE mode, we should limit it here.
402- guard let digits = src. tryEatLocatedPrefix ( maxLength: 3 , \. isOctalDigit)
403- else {
404- return nullScalar ( )
407+ default :
408+ return nil
405409 }
406- return . scalar( try Source . validateUnicodeScalar ( digits, . octal) )
407-
408- default :
409- fatalError ( " Unexpected scalar start " )
410410 }
411411 } . value
412412 }
@@ -1712,6 +1712,11 @@ extension Source {
17121712 return ref
17131713 }
17141714
1715+ // Hexadecimal and octal unicode scalars.
1716+ if let scalar = try src. lexUnicodeScalar ( ) {
1717+ return scalar
1718+ }
1719+
17151720 guard let char = src. tryEat ( ) else {
17161721 throw ParseError . expectedEscape
17171722 }
@@ -1723,14 +1728,6 @@ extension Source {
17231728 return . escaped( builtin)
17241729 }
17251730
1726- switch char {
1727- // Hexadecimal and octal unicode scalars.
1728- case " u " , " x " , " U " , " o " , " 0 " :
1729- return try src. expectUnicodeScalar ( escapedCharacter: char)
1730- default :
1731- break
1732- }
1733-
17341731 // We only allow unknown escape sequences for non-letter non-number ASCII,
17351732 // and non-ASCII whitespace.
17361733 // TODO: Once we have fix-its, suggest a `0` prefix for octal `[\7]`.
0 commit comments