@@ -137,7 +137,7 @@ extension String.UTF16View: BidirectionalCollection {
137137 /// In an empty UTF-16 view, `endIndex` is equal to `startIndex`.
138138 @inlinable @inline ( __always)
139139 public var endIndex : Index { return _guts. endIndex }
140-
140+
141141 @inlinable @inline ( __always)
142142 public func index( after idx: Index ) -> Index {
143143 if _slowPath ( _guts. isForeign) { return _foreignIndex ( after: idx) }
@@ -149,6 +149,7 @@ extension String.UTF16View: BidirectionalCollection {
149149 // TODO: If transcoded is 1, can we just skip ahead 4?
150150
151151 let idx = _utf16AlignNativeIndex ( idx)
152+
152153 let len = _guts. fastUTF8ScalarLength ( startingAt: idx. _encodedOffset)
153154 if len == 4 && idx. transcodedOffset == 0 {
154155 return idx. nextTranscoded
@@ -518,6 +519,105 @@ extension _StringGuts {
518519}
519520
520521extension String . UTF16View {
522+
523+ @inline ( __always)
524+ internal func _utf16Length< U: SIMD , S: SIMD > (
525+ readPtr: inout UnsafeRawPointer ,
526+ endPtr: UnsafeRawPointer ,
527+ unsignedSIMDType: U . Type ,
528+ signedSIMDType: S . Type
529+ ) -> Int where U. Scalar == UInt8 , S. Scalar == Int8 {
530+ var utf16Count = 0
531+
532+ while readPtr + MemoryLayout < U > . stride < endPtr {
533+ //Find the number of continuations (0b10xxxxxx)
534+ let sValue = Builtin . loadRaw ( readPtr. _rawValue) as S
535+ let continuations = S . zero. replacing ( with: S . one, where: sValue .< - 65 + 1 )
536+ let continuationCount = Int ( continuations. wrappedSum ( ) )
537+
538+ //Find the number of 4 byte code points (0b11110xxx)
539+ let uValue = Builtin . loadRaw ( readPtr. _rawValue) as U
540+ let fourBytes = U . zero. replacing ( with: U . one, where: uValue .>= 0b11110000 )
541+ let fourByteCount = Int ( fourBytes. wrappedSum ( ) )
542+
543+ utf16Count &+= ( U . scalarCount - continuationCount) + fourByteCount
544+
545+ readPtr += MemoryLayout< U> . stride
546+ }
547+
548+ return utf16Count
549+ }
550+
551+ @inline ( __always)
552+ internal func _utf16Distance( from start: Index , to end: Index ) -> Int {
553+ _internalInvariant ( end. transcodedOffset == 0 || end. transcodedOffset == 1 )
554+
555+ return ( end. transcodedOffset - start. transcodedOffset) + _guts. withFastUTF8 (
556+ range: start. _encodedOffset ..< end. _encodedOffset
557+ ) { utf8 in
558+ let rawBuffer = UnsafeRawBufferPointer ( utf8)
559+ guard rawBuffer. count > 0 else { return 0 }
560+
561+ var utf16Count = 0
562+ var readPtr = rawBuffer. baseAddress. unsafelyUnwrapped
563+ let initialReadPtr = readPtr
564+ let endPtr = readPtr + rawBuffer. count
565+
566+ //eat leading continuations
567+ while readPtr < endPtr {
568+ let byte = readPtr. load ( as: UInt8 . self)
569+ if !UTF8. isContinuation ( byte) {
570+ break
571+ }
572+ readPtr += 1
573+ }
574+
575+ // TODO: Currently, using SIMD sizes above SIMD8 is slower
576+ // Once that's fixed we should go up to SIMD64 here
577+
578+ utf16Count &+= _utf16Length (
579+ readPtr: & readPtr,
580+ endPtr: endPtr,
581+ unsignedSIMDType: SIMD8< UInt8> . self ,
582+ signedSIMDType: SIMD8< Int8> . self
583+ )
584+
585+ //TO CONSIDER: SIMD widths <8 here
586+
587+ //back up to the start of the current scalar if we may have a trailing
588+ //incomplete scalar
589+ if utf16Count > 0 && UTF8 . isContinuation ( readPtr. load ( as: UInt8 . self) ) {
590+ while readPtr > initialReadPtr && UTF8 . isContinuation ( readPtr. load ( as: UInt8 . self) ) {
591+ readPtr -= 1
592+ }
593+
594+ //The trailing scalar may be incomplete, subtract it out and check below
595+ let byte = readPtr. load ( as: UInt8 . self)
596+ let len = _utf8ScalarLength ( byte)
597+ utf16Count &-= len == 4 ? 2 : 1
598+ if readPtr == initialReadPtr {
599+ //if we backed up all the way and didn't hit a non-continuation, then
600+ //we don't have any complete scalars, and we should bail.
601+ return 0
602+ }
603+ }
604+
605+ //trailing bytes
606+ while readPtr < endPtr {
607+ let byte = readPtr. load ( as: UInt8 . self)
608+ let len = _utf8ScalarLength ( byte)
609+ // if we don't have enough bytes left, we don't have a complete scalar,
610+ // so don't add it to the count.
611+ if readPtr + len <= endPtr {
612+ utf16Count &+= len == 4 ? 2 : 1
613+ }
614+ readPtr += len
615+ }
616+
617+ return utf16Count
618+ }
619+ }
620+
521621 @usableFromInline
522622 @_effects ( releasenone)
523623 internal func _nativeGetOffset( for idx: Index ) -> Int {
@@ -532,9 +632,7 @@ extension String.UTF16View {
532632 let idx = _utf16AlignNativeIndex ( idx)
533633
534634 guard _guts. _useBreadcrumbs ( forEncodedOffset: idx. _encodedOffset) else {
535- // TODO: Generic _distance is still very slow. We should be able to
536- // skip over ASCII substrings quickly
537- return _distance ( from: startIndex, to: idx)
635+ return _utf16Distance ( from: startIndex, to: idx)
538636 }
539637
540638 // Simple and common: endIndex aka `length`.
@@ -544,7 +642,8 @@ extension String.UTF16View {
544642 // Otherwise, find the nearest lower-bound breadcrumb and count from there
545643 let ( crumb, crumbOffset) = breadcrumbsPtr. pointee. getBreadcrumb (
546644 forIndex: idx)
547- return crumbOffset + _distance( from: crumb, to: idx)
645+
646+ return crumbOffset + _utf16Distance( from: crumb, to: idx)
548647 }
549648
550649 @usableFromInline
0 commit comments