@@ -540,27 +540,84 @@ func readDigits(source: Source, start: Int, firstCode: UInt8) throws -> Int {
540540}
541541
542542/**
543- * Reads a string token from the source file.
543+ * Reads a `. string` token from the source file.
544544 *
545545 * "([^"\\\u000A\u000D]|(\\(u[0-9a-fA-F]{4}|["\\/bfnrt])))*"
546+ *
547+ * augmented to support blockstrings """ """ and return `.blockString` token if found.
546548 */
547549func readString( source: Source , start: Int , line: Int , col: Int , prev: Token ) throws -> Token {
550+ let ( token, isBlockString) = try readRawString ( source: source, start: start, line: line, col: col, prev: prev)
551+
552+ if isBlockString,
553+ let rawString = token. value {
554+ let valueString = blockStringValue ( rawValue: rawString)
555+ return Token ( kind: token. kind,
556+ start: token. start,
557+ end: token. end,
558+ line: token. line,
559+ column: token. column,
560+ value: valueString,
561+ prev: token. prev,
562+ next: token. next)
563+ }
564+ return token
565+ }
566+
567+ /** Reads a raw string token from the source.
568+ *
569+ * Doesn't do any clean up of leading indentations or trailing whitespace for blockstring lines;
570+ * so if `token.kind` == `.blockString`, call `blockStringValue` with `token.value` for that.
571+ *
572+ * returns: tuple of Token of kind `.string and Bool of true if it was a block string or not
573+ */
574+ func readRawString( source: Source , start: Int , line: Int , col: Int , prev: Token ) throws -> ( token: Token , isBlockString: Bool ) {
548575 let body = source. body
549576 var positionIndex = body. utf8. index ( body. utf8. startIndex, offsetBy: start + 1 )
550577 var chunkStartIndex = positionIndex
551578 var currentCode : UInt8 ? = 0
552579 var value = " "
553-
580+ var blockString = false
581+
582+ // if we have minimum 5 more quotes worth of characters left after eating the first quote, check for block quote
583+ // body.utf8.index(positionIndex, offsetBy: 5) < body.utf8.endIndex
584+ if body. utf8. distance ( from: positionIndex, to: body. utf8. endIndex) >= 5 {
585+ if body. charCode ( at: positionIndex) == 34 ,
586+ body. charCode ( at: body. utf8. index ( after: positionIndex) ) == 34 {
587+ blockString = true
588+ positionIndex = body. utf8. index ( positionIndex, offsetBy: 2 )
589+ chunkStartIndex = positionIndex
590+ }
591+ }
592+
554593 while positionIndex < body. utf8. endIndex {
555594 currentCode = body. charCode ( at: positionIndex)
556595
557- // not LineTerminator not Quote (")
558- guard let code = currentCode, code != 0x000A && code != 0x000D && code != 34 else {
596+ // not in a block quote not LineTerminator not Quote (")
597+ guard let code = currentCode,
598+ blockString || ( code != 0x000A && code != 0x000D && code != 34 ) else {
599+ break
600+ }
601+
602+ // Exit if:
603+ // - we are parsing a block quote
604+ // - the current code is a Quote (")
605+ // - we have at least two more characters in the input
606+ // - and both remaining characters are Quotes (")
607+ if blockString,
608+ let code = currentCode,
609+ code == 34 ,
610+ body. utf8. index ( positionIndex, offsetBy: 2 ) < body. utf8. endIndex,
611+ let codeNext = body. charCode ( at: body. utf8. index ( after: positionIndex) ) ,
612+ codeNext == 34 ,
613+ let codeNextNext = body. charCode ( at: body. utf8. index ( after: body. utf8. index ( after: positionIndex) ) ) ,
614+ codeNextNext == 34 {
615+ positionIndex = body. utf8. index ( after: body. utf8. index ( after: positionIndex) ) // position after quotes
559616 break
560617 }
561618
562619 // SourceCharacter
563- if code < 0x0020 && code != 0x0009 {
620+ if code < 0x0020 && code != 0x0009 && ! ( blockString && ( code == 0x000A || code == 0x000D ) ) {
564621 throw syntaxError (
565622 source: source,
566623 position: body. offset ( of: positionIndex) ,
@@ -633,17 +690,121 @@ func readString(source: Source, start: Int, line: Int, col: Int, prev: Token) th
633690 )
634691 }
635692
636- value += String ( body. utf8 [ chunkStartIndex..< positionIndex] ) !
693+ if blockString {
694+ let valueRangeEnd = body. utf8. index ( positionIndex, offsetBy: - 2 )
695+ if chunkStartIndex < valueRangeEnd { // empty string?
696+ value += String ( body. utf8 [ chunkStartIndex ..< valueRangeEnd] ) !
697+ }
698+ } else {
699+ value += String ( body. utf8 [ chunkStartIndex ..< positionIndex] ) !
700+ }
701+
702+ return ( token: Token ( kind: . string,
703+ start: start,
704+ end: body. offset ( of: positionIndex) + 1 ,
705+ line: line,
706+ column: col,
707+ value: value,
708+ prev: prev) ,
709+ isBlockString: blockString)
710+ }
637711
638- return Token (
639- kind: . string,
640- start: start,
641- end: body. offset ( of: positionIndex) + 1 ,
642- line: line,
643- column: col,
644- value: value,
645- prev: prev
646- )
712+ /**
713+ * blockStringValue(rawValue: String)
714+ *
715+ * Transcription of the algorithm specified in the [spec](http://spec.graphql.org/draft/#BlockStringValue())
716+ *
717+ * 1. Let `lines` be the result of splitting `rawValue` by *LineTerminator*.
718+ * 2. Let `commonIndent` be **null**.
719+ * 3. For each `line` in `lines`:
720+ * a. If `line` is the first item in `lines`, continue to the next line.
721+ * b. Let `length` be the number of characters in `line`.
722+ * c. Let `indent` be the number of leading consecutive *WhiteSpace* characters in `line`.
723+ * d. If `indent` is less than `length`:
724+ * i. If `commonIndent` is null or `indent` is less than `commonIndent`:
725+ * 1. Let `commonIndent` be `indent`.
726+ * 4. If `commonIndent` is not null:
727+ * a. For each `line` in `lines`:
728+ * i. If `line` is the first item in `lines`, continue to the next line.
729+ * ii. Remove `commonIndent` characters from the beginning of `line`.
730+ * 5. While the first item `line` in `lines` contains only *WhiteSpace*:
731+ * a. Remove the first item from `lines`.
732+ * 6. While the last item `line` in `lines` contains only *WhiteSpace*:
733+ * a. Remove the last item from `lines`.
734+ * 7. Let `formatted` be the empty character sequence.
735+ * 8. For each `line` in `lines`:
736+ * a. If `line` is the first item in `lines`:
737+ * i. Append `formatted` with `line`.
738+ * b. Otherwise:
739+ * i. Append `formatted` with a line feed character (U+000A).
740+ * ii. Append `formatted` with `line`.
741+ * 9. Return `formatted`.
742+ */
743+
744+ func blockStringValue( rawValue: String ) -> String {
745+ var lines = rawValue. utf8. split ( omittingEmptySubsequences: false ) { ( code) -> Bool in
746+ return code == 0x000A || code == 0x000D
747+ }
748+
749+ var commonIndent : Int = 0
750+
751+ for idx in lines. indices {
752+ let line = lines [ idx]
753+ if idx == lines. startIndex { continue }
754+ if let indentIndex = line. firstIndex ( where: { $0 != 0x0009 && $0 != 0x0020 } ) {
755+ let indent = line. distance ( from: line. startIndex, to: indentIndex)
756+ if commonIndent == 0 || indent < commonIndent {
757+ commonIndent = indent
758+ }
759+ }
760+ }
761+
762+ var newLines : [ String . UTF8View . SubSequence ] = [ ]
763+ if commonIndent != 0 {
764+ for idx in lines. indices {
765+ let line = lines [ idx]
766+ if idx == lines. startIndex {
767+ newLines. append ( line)
768+ continue
769+ }
770+ newLines. append ( line. dropFirst ( commonIndent) )
771+ }
772+ lines = newLines
773+ newLines. removeAll ( )
774+ }
775+
776+ for idx in lines. indices {
777+ let line = lines [ idx]
778+ if newLines. count == 0 ,
779+ line. firstIndex ( where: { $0 != 0x0009 && $0 != 0x0020 } ) == nil {
780+ continue
781+ }
782+ newLines. append ( line)
783+ }
784+ lines = newLines
785+
786+ newLines. removeAll ( )
787+ for idx in lines. indices. reversed ( ) {
788+ let line = lines [ idx]
789+ if newLines. count == 0 ,
790+ line. firstIndex ( where: { $0 != 0x0009 && $0 != 0x0020 } ) == nil {
791+ continue
792+ }
793+ newLines. insert ( line, at: newLines. startIndex)
794+ }
795+ lines = newLines
796+
797+ var result : Substring = Substring ( )
798+ for idx in lines. indices {
799+ if idx == lines. startIndex {
800+ result. append ( contentsOf: Substring ( lines [ idx] ) )
801+ } else {
802+ result. append ( contentsOf: Substring ( " \u{000A} " ) )
803+ result. append ( contentsOf: Substring ( lines [ idx] ) )
804+ }
805+ }
806+
807+ return String ( result)
647808}
648809
649810/**
0 commit comments