@@ -493,7 +493,7 @@ struct CharacterByte: ExpressibleByUnicodeScalarLiteral, ExpressibleByIntegerLit
493493 let value : UInt8
494494
495495 init ( unicodeScalarLiteral value: Unicode . Scalar ) {
496- self . value = UInt8 ( ascii: Unicode . Scalar ( unicodeScalarLiteral : value) )
496+ self . value = UInt8 ( ascii: value)
497497 }
498498
499499 init ( integerLiteral value: UInt8 ) {
@@ -964,11 +964,11 @@ extension Lexer.Cursor {
964964 return Lexer . Result ( . endOfFile)
965965 default :
966966 var tmp = self
967- if tmp. advance ( if: { Unicode . Scalar ( $0 ) . isValidIdentifierStartCodePoint } ) {
967+ if tmp. advance ( if: { $0 . isValidIdentifierStartCodePoint } ) {
968968 return self . lexIdentifier ( )
969969 }
970970
971- if tmp. advance ( if: { Unicode . Scalar ( $0 ) . isOperatorStartCodePoint } ) {
971+ if tmp. advance ( if: { $0 . isOperatorStartCodePoint } ) {
972972 return self . lexOperatorIdentifier (
973973 sourceBufferStart: sourceBufferStart,
974974 preferRegexOverBinaryOperator: preferRegexOverBinaryOperator
@@ -1009,7 +1009,7 @@ extension Lexer.Cursor {
10091009 private mutating func lexAfterClosingStringQuote( ) -> Lexer . Result {
10101010 switch self . peek ( ) {
10111011 case " # " :
1012- self . advance ( while: { $0 == Unicode . Scalar ( " # " ) } )
1012+ self . advance ( while: { $0 == " # " } )
10131013 return Lexer . Result ( . rawStringPoundDelimiter, stateTransition: . pop)
10141014 case nil :
10151015 return Lexer . Result ( . endOfFile)
@@ -1028,7 +1028,7 @@ extension Lexer.Cursor {
10281028 /// number of '#' is correct because otherwise `isAtStringInterpolationAnchor`
10291029 /// would have returned false in `lexInStringLiteral` and w we wouldn't have
10301030 /// transitioned to the `afterBackslashOfStringInterpolation` state.
1031- self . advance ( while: { $0 == Unicode . Scalar ( " # " ) } )
1031+ self . advance ( while: { $0 == " # " } )
10321032 return Lexer . Result ( . rawStringPoundDelimiter)
10331033 case " ( " :
10341034 _ = self . advance ( )
@@ -1248,9 +1248,7 @@ extension Lexer.Cursor {
12481248 )
12491249 }
12501250
1251- self . advance ( while: {
1252- ( $0 >= Unicode . Scalar ( " 0 " ) && $0 <= Unicode . Scalar ( " 7 " ) ) || $0 == Unicode . Scalar ( " _ " )
1253- } )
1251+ self . advance ( while: { ( $0 >= " 0 " && $0 <= " 7 " ) || $0 == " _ " } )
12541252
12551253 let tmp = self
12561254 if self . advance ( if: { $0. isValidIdentifierContinuationCodePoint } ) {
@@ -1279,9 +1277,7 @@ extension Lexer.Cursor {
12791277 )
12801278 }
12811279
1282- self . advance ( while: {
1283- $0 == Unicode . Scalar ( " 0 " ) || $0 == Unicode . Scalar ( " 1 " ) || $0 == Unicode . Scalar ( " _ " )
1284- } )
1280+ self . advance ( while: { $0 == " 0 " || $0 == " 1 " || $0 == " _ " } )
12851281
12861282 let tmp = self
12871283 if self . advance ( if: { $0. isValidIdentifierContinuationCodePoint } ) {
@@ -1298,7 +1294,7 @@ extension Lexer.Cursor {
12981294
12991295 // Handle a leading [0-9]+, lexing an integer or falling through if we have a
13001296 // floating point value.
1301- self . advance ( while: { $0. isDigit || $0 == Unicode . Scalar ( " _ " ) } )
1297+ self . advance ( while: { $0. isDigit || $0 == " _ " } )
13021298
13031299 // TODO: This can probably be unified with lexHexNumber somehow
13041300
@@ -1333,7 +1329,7 @@ extension Lexer.Cursor {
13331329 // Lex decimal point.
13341330 if self . advance ( matching: " . " ) {
13351331 // Lex any digits after the decimal point.
1336- self . advance ( while: { $0. isDigit || $0 == Unicode . Scalar ( " _ " ) } )
1332+ self . advance ( while: { $0. isDigit || $0 == " _ " } )
13371333 }
13381334
13391335 // Lex exponent.
@@ -1364,7 +1360,7 @@ extension Lexer.Cursor {
13641360 )
13651361 }
13661362
1367- self . advance ( while: { $0. isDigit || $0 == Unicode . Scalar ( " _ " ) } )
1363+ self . advance ( while: { $0. isDigit || $0 == " _ " } )
13681364
13691365 let tmp = self
13701366 if self . advance ( if: { $0. isValidIdentifierContinuationCodePoint } ) {
@@ -1401,7 +1397,7 @@ extension Lexer.Cursor {
14011397 }
14021398 }
14031399
1404- self . advance ( while: { $0. isHexDigit || $0 == Unicode . Scalar ( " _ " ) } )
1400+ self . advance ( while: { $0. isHexDigit || $0 == " _ " } )
14051401
14061402 if self . isAtEndOfFile || self . is ( notAt: " . " , " p " , " P " ) {
14071403 let tmp = self
@@ -1429,7 +1425,7 @@ extension Lexer.Cursor {
14291425 return Lexer . Result ( . integerLiteral)
14301426 }
14311427
1432- self . advance ( while: { $0. isHexDigit || $0 == Unicode . Scalar ( " _ " ) } )
1428+ self . advance ( while: { $0. isHexDigit || $0 == " _ " } )
14331429
14341430 if self . isAtEndOfFile || self . is ( notAt: " p " , " P " ) {
14351431 if let peeked = self . peek ( at: 1 ) , !Unicode. Scalar ( peeked) . isDigit {
@@ -1486,7 +1482,7 @@ extension Lexer.Cursor {
14861482 )
14871483 }
14881484
1489- self . advance ( while: { $0. isDigit || $0 == Unicode . Scalar ( " _ " ) } )
1485+ self . advance ( while: { $0. isDigit || $0 == " _ " } )
14901486
14911487 let tmp = self
14921488 if self . advance ( if: { $0. isValidIdentifierContinuationCodePoint } ) {
@@ -1545,8 +1541,8 @@ extension Lexer.Cursor {
15451541 case success( Unicode . Scalar )
15461542
15471543 /// An escaped character, e.g. `\n` or `\u{1234}`. It has been validated that
1548- /// this is a valid character
1549- case validatedEscapeSequence( Character )
1544+ /// this is a valid unicode scalar.
1545+ case validatedEscapeSequence( Unicode . Scalar )
15501546
15511547 /// The end of a string literal has been reached.
15521548 case endOfString
@@ -1605,16 +1601,11 @@ extension Lexer.Cursor {
16051601 case " \\ " : // Escapes.
16061602 _ = self . advance ( )
16071603 if !self . advanceIfStringDelimiter ( delimiterLength: delimiterLength) {
1608- return . success( Unicode . Scalar ( " \\ " ) )
1604+ return . success( " \\ " )
16091605 }
16101606 switch self . lexEscapedCharacter ( isMultilineString: stringLiteralKind == . multiLine) {
1611- case . success( let escapedCharacterCode) :
1612- // Check to see if the encoding is valid.
1613- if let validatedScalar = Unicode . Scalar ( escapedCharacterCode) {
1614- return . validatedEscapeSequence( Character ( validatedScalar) )
1615- } else {
1616- return . error( . invalidEscapeSequenceInStringLiteral)
1617- }
1607+ case . success( let codePoint) :
1608+ return . validatedEscapeSequence( codePoint)
16181609 case . error( let kind) :
16191610 return . error( kind)
16201611 }
@@ -1635,7 +1626,7 @@ extension Lexer.Cursor {
16351626 enum EscapedCharacterLex {
16361627 // Successfully lexed an escape sequence that represents the Unicode character
16371628 // at the given codepoint
1638- case success( UInt32 )
1629+ case success( Unicode . Scalar )
16391630 case error( TokenDiagnostic . Kind )
16401631 }
16411632
@@ -1649,13 +1640,13 @@ extension Lexer.Cursor {
16491640 // Escape processing. We already ate the "\".
16501641 switch self . peek ( ) {
16511642 // Simple single-character escapes.
1652- case " 0 " : _ = self . advance ( ) ; return . success( UInt32 ( UInt8 ( ascii : " \0 " ) ) )
1653- case " n " : _ = self . advance ( ) ; return . success( UInt32 ( UInt8 ( ascii : " \n " ) ) )
1654- case " r " : _ = self . advance ( ) ; return . success( UInt32 ( UInt8 ( ascii : " \r " ) ) )
1655- case " t " : _ = self . advance ( ) ; return . success( UInt32 ( UInt8 ( ascii : " \t " ) ) )
1656- case #"""# : _ = self . advance ( ) ; return . success( UInt32 ( UInt8 ( ascii : #"""# ) ) )
1657- case " ' " : _ = self . advance ( ) ; return . success( UInt32 ( UInt8 ( ascii : " ' " ) ) )
1658- case " \\ " : _ = self . advance ( ) ; return . success( UInt32 ( UInt8 ( ascii : " \\ " ) ) )
1643+ case " 0 " : _ = self . advance ( ) ; return . success( " \0 " )
1644+ case " n " : _ = self . advance ( ) ; return . success( " \n " )
1645+ case " r " : _ = self . advance ( ) ; return . success( " \r " )
1646+ case " t " : _ = self . advance ( ) ; return . success( " \t " )
1647+ case #"""# : _ = self . advance ( ) ; return . success( #"""# )
1648+ case " ' " : _ = self . advance ( ) ; return . success( " ' " )
1649+ case " \\ " : _ = self . advance ( ) ; return . success( " \\ " )
16591650
16601651 case " u " : // e.g. \u{1234}
16611652 _ = self . advance ( )
@@ -1667,7 +1658,7 @@ extension Lexer.Cursor {
16671658 return self . lexUnicodeEscape ( )
16681659 case " \n " , " \r " :
16691660 if isMultilineString && self . maybeConsumeNewlineEscape ( ) {
1670- return . success( UInt32 ( UInt8 ( ascii : " \n " ) ) )
1661+ return . success( " \n " )
16711662 }
16721663 return . error( . invalidEscapeSequenceInStringLiteral)
16731664 case nil :
@@ -1692,24 +1683,30 @@ extension Lexer.Cursor {
16921683 precondition ( quoteConsumed)
16931684
16941685 let digitStart = self
1695- var numDigits = 0
1696- while self . advance ( if: { $0. isHexDigit } ) {
1697- numDigits += 1
1698- }
1686+ self . advance ( while: { $0. isHexDigit } )
1687+
1688+ let digitText = SyntaxText (
1689+ baseAddress: digitStart. pointer,
1690+ count: digitStart. distance ( to: self )
1691+ )
16991692
17001693 guard self . advance ( matching: " } " ) else {
17011694 return . error( . expectedClosingBraceInUnicodeEscape)
17021695 }
17031696
1704- if numDigits == 0 || numDigits > 8 {
1697+ guard 1 <= digitText . count && digitText . count <= 8 else {
17051698 return . error( . invalidNumberOfHexDigitsInUnicodeEscape)
17061699 }
17071700
1708- if let codePoint = UInt32 ( String ( decoding: digitStart. input [ 0 ..< numDigits] , as: UTF8 . self) , radix: 16 ) {
1709- return . success( codePoint)
1710- } else {
1701+ guard
1702+ // FIXME: Implement 'UInt32(_: SyntaxText, radix:)'.
1703+ let codePoint = UInt32 ( String ( syntaxText: digitText) , radix: 16 ) ,
1704+ let scalar = Unicode . Scalar. init ( codePoint)
1705+ else {
17111706 return . error( . invalidEscapeSequenceInStringLiteral)
17121707 }
1708+
1709+ return . success( scalar)
17131710 }
17141711
17151712 private mutating func maybeConsumeNewlineEscape( ) -> Bool {
@@ -1719,7 +1716,7 @@ extension Lexer.Cursor {
17191716 case " " , " \t " :
17201717 continue
17211718 case " \r " :
1722- _ = tmp. advance ( if: { $0 == Unicode . Scalar ( " \n " ) } )
1719+ _ = tmp. advance ( if: { $0 == " \n " } )
17231720 fallthrough
17241721 case " \n " :
17251722 self = tmp
@@ -1776,7 +1773,7 @@ extension Lexer.Cursor {
17761773 // Scan ahead until the end of the line. Every time we see a closing
17771774 // quote, check if it is followed by the correct number of closing delimiters.
17781775 while isSingleLineString. is ( notAt: " \r " , " \n " ) {
1779- if isSingleLineString. advance ( if: { $0 == Unicode . Scalar ( ( #"""# ) ) } ) {
1776+ if isSingleLineString. advance ( if: { $0 == #"""# } ) {
17801777 if isSingleLineString. advanceIfStringDelimiter ( delimiterLength: leadingDelimiterLength) {
17811778 return Lexer . Result ( . stringQuote, stateTransition: stateTransitionAfterLexingStringQuote ( kind: . singleLine) )
17821779 }
@@ -2238,7 +2235,7 @@ extension Lexer.Cursor {
22382235 case . error:
22392236 // If the character was incorrectly encoded, give up.
22402237 return nil
2241- case . endOfString, . success( Unicode . Scalar ( 0x201D ) ) :
2238+ case . endOfString, . success( " \u{201D} " ) :
22422239 // If we found a closing quote, then we're done. Just return the spot
22432240 // to continue.
22442241 return body
@@ -2262,10 +2259,10 @@ extension Lexer.Cursor {
22622259 precondition ( !( self . peekScalar ( ) ? . isValidIdentifierStartCodePoint ?? false ) && !( self . peekScalar ( ) ? . isOperatorStartCodePoint ?? false ) )
22632260 let start = self
22642261 var tmp = self
2265- if tmp. advance ( if: { Unicode . Scalar ( $0 ) . isValidIdentifierContinuationCodePoint } ) {
2262+ if tmp. advance ( if: { $0 . isValidIdentifierContinuationCodePoint } ) {
22662263 // If this is a valid identifier continuation, but not a valid identifier
22672264 // start, attempt to recover by eating more continuation characters.
2268- tmp. advance ( while: { Unicode . Scalar ( $0 ) . isValidIdentifierContinuationCodePoint } )
2265+ tmp. advance ( while: { $0 . isValidIdentifierContinuationCodePoint } )
22692266 self = tmp
22702267 return . lexemeContents( Lexer . Result ( . identifier, error: LexingDiagnostic ( . invalidIdentifierStartCharacter, position: start) ) )
22712268 }
@@ -2369,10 +2366,8 @@ extension Lexer.Cursor {
23692366 previous: curPtr. input [ markerKind. introducer. utf8. count - 1 ]
23702367 )
23712368 while !restOfBuffer. isAtEndOfFile {
2372- let terminatorStart = markerKind. terminator. utf8. first!
2373- restOfBuffer. advance ( while: { byte in
2374- byte != Unicode . Scalar ( terminatorStart)
2375- } )
2369+ let terminatorStart = markerKind. terminator. unicodeScalars. first!
2370+ restOfBuffer. advance ( while: { byte in byte != terminatorStart } )
23762371
23772372 guard restOfBuffer. starts ( with: markerKind. terminator. utf8) else {
23782373 _ = restOfBuffer. advance ( )
0 commit comments