@@ -341,6 +341,15 @@ func readToken(lexer: Lexer, prev: Token) throws -> Token {
341341 )
342342 // "
343343 case 34 :
344+ if body. charCode ( at: position + 1 ) == 34 &&
345+ body. charCode ( at: position + 2 ) == 34 {
346+ return try readBlockString ( lexer: lexer,
347+ source: source,
348+ start: position,
349+ line: line,
350+ col: col,
351+ prev: prev)
352+ }
344353 return try readString (
345354 source: source,
346355 start: position,
@@ -351,7 +360,7 @@ func readToken(lexer: Lexer, prev: Token) throws -> Token {
351360 default :
352361 break
353362 }
354-
363+
355364 throw syntaxError (
356365 source: source,
357366 position: position,
@@ -540,84 +549,27 @@ func readDigits(source: Source, start: Int, firstCode: UInt8) throws -> Int {
540549}
541550
542551/**
543- * Reads a `. string` token from the source file.
552+ * Reads a string token from the source file.
544553 *
545554 * "([^"\\\u000A\u000D]|(\\(u[0-9a-fA-F]{4}|["\\/bfnrt])))*"
546- *
547- * augmented to support blockstrings """ """ and return `.blockString` token if found.
548555 */
549556func readString( source: Source , start: Int , line: Int , col: Int , prev: Token ) throws -> Token {
550- let ( token, isBlockString) = try readRawString ( source: source, start: start, line: line, col: col, prev: prev)
551-
552- if isBlockString,
553- let rawString = token. value {
554- let valueString = blockStringValue ( rawValue: rawString)
555- return Token ( kind: token. kind,
556- start: token. start,
557- end: token. end,
558- line: token. line,
559- column: token. column,
560- value: valueString,
561- prev: token. prev,
562- next: token. next)
563- }
564- return token
565- }
566-
567- /** Reads a raw string token from the source.
568- *
569- * Doesn't do any clean up of leading indentations or trailing whitespace for blockstring lines;
570- * so if `token.kind` == `.blockString`, call `blockStringValue` with `token.value` for that.
571- *
572- * returns: tuple of Token of kind `.string and Bool of true if it was a block string or not
573- */
574- func readRawString( source: Source , start: Int , line: Int , col: Int , prev: Token ) throws -> ( token: Token , isBlockString: Bool ) {
575557 let body = source. body
576558 var positionIndex = body. utf8. index ( body. utf8. startIndex, offsetBy: start + 1 )
577559 var chunkStartIndex = positionIndex
578560 var currentCode : UInt8 ? = 0
579561 var value = " "
580- var blockString = false
581-
582- // if we have minimum 5 more quotes worth of characters left after eating the first quote, check for block quote
583- // body.utf8.index(positionIndex, offsetBy: 5) < body.utf8.endIndex
584- if body. utf8. distance ( from: positionIndex, to: body. utf8. endIndex) >= 5 {
585- if body. charCode ( at: positionIndex) == 34 ,
586- body. charCode ( at: body. utf8. index ( after: positionIndex) ) == 34 {
587- blockString = true
588- positionIndex = body. utf8. index ( positionIndex, offsetBy: 2 )
589- chunkStartIndex = positionIndex
590- }
591- }
592-
562+
593563 while positionIndex < body. utf8. endIndex {
594564 currentCode = body. charCode ( at: positionIndex)
595565
596- // not in a block quote not LineTerminator not Quote (")
597- guard let code = currentCode,
598- blockString || ( code != 0x000A && code != 0x000D && code != 34 ) else {
599- break
600- }
601-
602- // Exit if:
603- // - we are parsing a block quote
604- // - the current code is a Quote (")
605- // - we have at least two more characters in the input
606- // - and both remaining characters are Quotes (")
607- if blockString,
608- let code = currentCode,
609- code == 34 ,
610- body. utf8. index ( positionIndex, offsetBy: 2 ) < body. utf8. endIndex,
611- let codeNext = body. charCode ( at: body. utf8. index ( after: positionIndex) ) ,
612- codeNext == 34 ,
613- let codeNextNext = body. charCode ( at: body. utf8. index ( after: body. utf8. index ( after: positionIndex) ) ) ,
614- codeNextNext == 34 {
615- positionIndex = body. utf8. index ( after: body. utf8. index ( after: positionIndex) ) // position after quotes
566+ // not LineTerminator not Quote (")
567+ guard let code = currentCode, code != 0x000A && code != 0x000D && code != 34 else {
616568 break
617569 }
618570
619571 // SourceCharacter
620- if code < 0x0020 && code != 0x0009 && ! ( blockString && ( code == 0x000A || code == 0x000D ) ) {
572+ if code < 0x0020 && code != 0x0009 {
621573 throw syntaxError (
622574 source: source,
623575 position: body. offset ( of: positionIndex) ,
@@ -690,23 +642,93 @@ func readRawString(source: Source, start: Int, line: Int, col: Int, prev: Token)
690642 )
691643 }
692644
693- if blockString {
694- let valueRangeEnd = body. utf8. index ( positionIndex, offsetBy: - 2 )
695- if chunkStartIndex < valueRangeEnd { // empty string?
696- value += String ( body. utf8 [ chunkStartIndex ..< valueRangeEnd] ) !
645+ value += String ( body. utf8 [ chunkStartIndex..< positionIndex] ) !
646+
647+ return Token (
648+ kind: . string,
649+ start: start,
650+ end: body. offset ( of: positionIndex) + 1 ,
651+ line: line,
652+ column: col,
653+ value: value,
654+ prev: prev
655+ )
656+ }
657+
658+ /**
659+ * Reads a block string token from the source file.
660+ *
661+ * """("?"?(\\"""|\\(?!=""")|[^"\\]))*"""
662+ */
663+ func readBlockString( lexer: Lexer , source: Source , start: Int , line: Int , col: Int , prev: Token ) throws -> Token {
664+ let body = source. body
665+ var positionIndex = body. utf8. index ( body. utf8. startIndex, offsetBy: start + 3 )
666+ var chunkStartIndex = positionIndex
667+ var code : UInt8 = 0
668+ var rawValue = " "
669+
670+ while positionIndex < body. utf8. endIndex {
671+ code = body. utf8 [ positionIndex]
672+
673+ if code == 34 ,
674+ body. utf8. distance ( from: positionIndex, to: body. utf8. endIndex) > 2 ,
675+ body. utf8 [ body. utf8. index ( positionIndex, offsetBy: 1 ) ] == 34 ,
676+ body. utf8 [ body. utf8. index ( positionIndex, offsetBy: 2 ) ] == 34 {
677+
678+ rawValue += String ( body. utf8 [ chunkStartIndex..< positionIndex] ) !
679+ return Token (
680+ kind: . blockstring,
681+ start: start,
682+ end: body. offset ( of: positionIndex) + 3 ,
683+ line: line,
684+ column: col,
685+ value: blockStringValue ( rawValue: rawValue) ,
686+ prev: prev
687+ )
688+ }
689+
690+ if code < 0x0020 &&
691+ code != 0x0009 &&
692+ code != 0x000A &&
693+ code != 0x000D {
694+ throw syntaxError (
695+ source: source,
696+ position: body. offset ( of: positionIndex) ,
697+ description: " Invalid character within BlockString: \( character ( code) ) . "
698+ )
699+ }
700+
701+ if code == 0x000A {
702+ // new line
703+ positionIndex = body. utf8. index ( after: positionIndex)
704+ lexer. line += 1
705+ lexer. lineStart = body. offset ( of: positionIndex)
706+ } else if code == 0x000D {
707+ // carriage return
708+ let nextIdx = body. utf8. index ( after: positionIndex)
709+ if nextIdx < body. utf8. endIndex,
710+ body. utf8 [ nextIdx] == 0x000A {
711+ positionIndex = body. utf8. index ( after: nextIdx)
712+ } else {
713+ positionIndex = nextIdx
714+ }
715+ lexer. line += 1
716+ lexer. lineStart = body. offset ( of: positionIndex)
717+ } else if code == 92 ,
718+ body. utf8. distance ( from: positionIndex, to: body. utf8. endIndex) > 4 ,
719+ body. utf8 [ body. utf8. index ( positionIndex, offsetBy: 1 ) ] == 34 ,
720+ body. utf8 [ body. utf8. index ( positionIndex, offsetBy: 2 ) ] == 34 ,
721+ body. utf8 [ body. utf8. index ( positionIndex, offsetBy: 3 ) ] == 34 {
722+ // escaped triple quote (\""")
723+ rawValue += String ( body. utf8 [ chunkStartIndex..< positionIndex] ) ! + " \" \" \" "
724+ positionIndex = body. utf8. index ( positionIndex, offsetBy: 4 )
725+ chunkStartIndex = positionIndex
726+ } else {
727+ positionIndex = body. utf8. index ( after: positionIndex)
697728 }
698- } else {
699- value += String ( body. utf8 [ chunkStartIndex ..< positionIndex] ) !
700729 }
701730
702- return ( token: Token ( kind: . string,
703- start: start,
704- end: body. offset ( of: positionIndex) + 1 ,
705- line: line,
706- column: col,
707- value: value,
708- prev: prev) ,
709- isBlockString: blockString)
731+ throw syntaxError ( source: source, position: body. offset ( of: positionIndex) , description: " Unterminated blockstring " )
710732}
711733
712734/**
0 commit comments