@@ -779,3 +779,142 @@ public let utf16Tests = [
779779 ] ,
780780]
781781
782+ extension String {
783+ /// Print out a full list of indices in every view of this string.
784+ /// This is useful while debugging string indexing issues.
785+ public func dumpIndices( ) {
786+ print ( " ------------------------------------------------------------------- " )
787+ print ( " String: \( String ( reflecting: self ) ) " )
788+ print ( " Characters: " )
789+ self . indices. forEach { i in
790+ let char = self [ i]
791+ print ( " \( i) -> \( String ( reflecting: char) ) " )
792+ }
793+ print ( " Scalars: " )
794+ self . unicodeScalars. indices. forEach { i in
795+ let scalar = self . unicodeScalars [ i]
796+ let value = String ( scalar. value, radix: 16 , uppercase: true )
797+ let padding = String ( repeating: " 0 " , count: max ( 0 , 4 - value. count) )
798+ let name = scalar. properties. name ?? " \( scalar. debugDescription) "
799+ print ( " \( i) -> U+ \( padding) \( value) \( name) " )
800+ }
801+ print ( " UTF-8: " )
802+ self . utf8. indices. forEach { i in
803+ let code = self . utf8 [ i]
804+ let value = String ( code, radix: 16 , uppercase: true )
805+ let padding = value. count < 2 ? " 0 " : " "
806+ print ( " \( i) -> \( padding) \( value) " )
807+ }
808+ print ( " UTF-16: " )
809+ self . utf16. indices. forEach { i in
810+ let code = self . utf16 [ i]
811+ let value = String ( code, radix: 16 , uppercase: true )
812+ let padding = String ( repeating: " 0 " , count: 4 - value. count)
813+ print ( " \( i) -> \( padding) \( value) " )
814+ }
815+ }
816+
817+ // Returns a list of every valid index in every string view, optionally
818+ // including end indices. We keep equal indices originating from different
819+ // views because they may have different grapheme size caches or flags etc.
820+ public func allIndices( includingEnd: Bool = true ) -> [ String . Index ] {
821+ var r = Array ( self . indices)
822+ if includingEnd { r. append ( self . endIndex) }
823+ r += Array ( self . unicodeScalars. indices)
824+ if includingEnd { r. append ( self . unicodeScalars. endIndex) }
825+ r += Array ( self . utf8. indices)
826+ if includingEnd { r. append ( self . utf8. endIndex) }
827+ r += Array ( self . utf16. indices)
828+ if includingEnd { r. append ( self . utf16. endIndex) }
829+ return r
830+ }
831+ }
832+
833+ extension Substring {
834+ // Returns a list of every valid index in every substring view, optionally
835+ // including end indices. We keep equal indices originating from different
836+ // views because they may have different grapheme size caches or flags etc.
837+ public func allIndices( includingEnd: Bool = true ) -> [ String . Index ] {
838+ var r = Array ( self . indices)
839+ if includingEnd { r. append ( self . endIndex) }
840+ r += Array ( self . unicodeScalars. indices)
841+ if includingEnd { r. append ( self . unicodeScalars. endIndex) }
842+ r += Array ( self . utf8. indices)
843+ if includingEnd { r. append ( self . utf8. endIndex) }
844+ r += Array ( self . utf16. indices)
845+ if includingEnd { r. append ( self . utf16. endIndex) }
846+ return r
847+ }
848+ }
849+
850+ extension Collection {
851+ // Assuming both `self` and `other` use the same index space, call `body` for
852+ // each index `i` in `other`, along with the slice in `self` that begins at
853+ // `i` and ends at the index following it in `other`.
854+ //
855+ // `other` must start with an item that is less than or equal to the first
856+ // item in `self`.
857+ func forEachIndexGroup< G: Collection > (
858+ by other: G ,
859+ body: ( G . Index , Self . SubSequence , Int ) throws -> Void
860+ ) rethrows
861+ where G. Index == Self . Index
862+ {
863+ if other. isEmpty {
864+ assert ( self . isEmpty)
865+ return
866+ }
867+ var i = other. startIndex
868+ var j = self . startIndex
869+ var offset = 0
870+ while i != other. endIndex {
871+ let current = i
872+ other. formIndex ( after: & i)
873+ let start = j
874+ while j < i, j < self . endIndex {
875+ self . formIndex ( after: & j)
876+ }
877+ let end = j
878+ try body ( current, self [ start ..< end] , offset)
879+ offset += 1
880+ }
881+ }
882+ }
883+
884+ extension String {
885+ /// Returns a dictionary mapping each valid index to the index that addresses
886+ /// the nearest scalar boundary, rounding down.
887+ public func scalarMap( ) -> [ Index : ( index: Index , offset: Int ) ] {
888+ var map : [ Index : ( index: Index , offset: Int ) ] = [ : ]
889+
890+ utf8. forEachIndexGroup ( by: unicodeScalars) { scalar, slice, offset in
891+ for i in slice. indices { map [ i] = ( scalar, offset) }
892+ }
893+ utf16. forEachIndexGroup ( by: unicodeScalars) { scalar, slice, offset in
894+ for i in slice. indices { map [ i] = ( scalar, offset) }
895+ }
896+ self . forEachIndexGroup ( by: unicodeScalars) { scalar, slice, offset in
897+ for i in slice. indices { map [ i] = ( scalar, offset) }
898+ }
899+ map [ endIndex] = ( endIndex, unicodeScalars. count)
900+ return map
901+ }
902+
903+ /// Returns a dictionary mapping each valid index to the index that addresses
904+ /// the nearest character boundary, rounding down.
905+ public func characterMap( ) -> [ Index : ( index: Index , offset: Int ) ] {
906+ var map : [ Index : ( index: Index , offset: Int ) ] = [ : ]
907+ utf8. forEachIndexGroup ( by: self ) { char, slice, offset in
908+ for i in slice. indices { map [ i] = ( char, offset) }
909+ }
910+ utf16. forEachIndexGroup ( by: self ) { char, slice, offset in
911+ for i in slice. indices { map [ i] = ( char, offset) }
912+ }
913+ unicodeScalars. forEachIndexGroup ( by: self ) { char, slice, offset in
914+ for i in slice. indices { map [ i] = ( char, offset) }
915+ }
916+ map [ endIndex] = ( endIndex, count)
917+ return map
918+ }
919+ }
920+
0 commit comments