@@ -435,6 +435,22 @@ extension String.UTF16View: BidirectionalCollection {
435435
436436 return _foreignSubscript ( position: idx)
437437 }
438+
439+ internal subscript( nativeNonASCIIOffset offset: Int ) -> UTF16 . CodeUnit {
440+ @_effects ( releasenone) get {
441+ let threshold = _breadcrumbStride / 2
442+ // Do not use breadcrumbs if directly computing the result is expected
443+ // to be cheaper
444+ let idx = offset < threshold ?
445+ _index ( startIndex, offsetBy: offset) . _knownUTF8 :
446+ _nativeGetIndex ( for: offset)
447+ _precondition ( idx. _encodedOffset < _guts. count,
448+ " String index is out of bounds " )
449+ let scalar = _guts. fastUTF8Scalar (
450+ startingAt: _guts. scalarAlign ( idx) . _encodedOffset)
451+ return scalar. utf16 [ idx. transcodedOffset]
452+ }
453+ }
438454}
439455
440456extension String . UTF16View {
@@ -948,6 +964,21 @@ extension String.UTF16View {
948964 fatalError ( )
949965 }
950966 }
967+
968+ // See _nativeCopy(into:alignedRange:), except this uses un-verified UTF16
969+ // offsets instead of aligned indexes
970+ internal func _nativeCopy(
971+ into buffer: UnsafeMutableBufferPointer < UInt16 > ,
972+ offsetRange range: Range < Int >
973+ ) {
974+ let alignedRange = _indexRange ( for: range, from: startIndex)
975+ _precondition ( alignedRange. lowerBound. _encodedOffset <= _guts. count &&
976+ alignedRange. upperBound. _encodedOffset <= _guts. count,
977+ " String index is out of bounds " )
978+ unsafe _nativeCopy(
979+ into: buffer,
980+ alignedRange: alignedRange. lowerBound ..< alignedRange. upperBound)
981+ }
951982
952983 // Copy (i.e. transcode to UTF-16) our contents into a buffer. `alignedRange`
953984 // means that the indices are part of the UTF16View.indices -- they are either
@@ -962,16 +993,16 @@ extension String.UTF16View {
962993 range. lowerBound == _utf16AlignNativeIndex ( range. lowerBound) )
963994 _internalInvariant (
964995 range. upperBound == _utf16AlignNativeIndex ( range. upperBound) )
965-
996+
966997 if _slowPath ( range. isEmpty) { return }
967-
998+
968999 let isASCII = _guts. isASCII
9691000 return unsafe _guts. withFastUTF8 { utf8 in
9701001 var writeIdx = 0
9711002 let writeEnd = buffer. count
9721003 var readIdx = range. lowerBound. _encodedOffset
9731004 let readEnd = range. upperBound. _encodedOffset
974-
1005+
9751006 if isASCII {
9761007 _internalInvariant ( range. lowerBound. transcodedOffset == 0 )
9771008 _internalInvariant ( range. upperBound. transcodedOffset == 0 )
@@ -984,7 +1015,7 @@ extension String.UTF16View {
9841015 }
9851016 return
9861017 }
987-
1018+
9881019 // Handle mid-transcoded-scalar initial index
9891020 if _slowPath( range. lowerBound. transcodedOffset != 0 ) {
9901021 _internalInvariant ( range. lowerBound. transcodedOffset == 1 )
@@ -995,7 +1026,7 @@ extension String.UTF16View {
9951026 readIdx &+= len
9961027 writeIdx &+= 1
9971028 }
998-
1029+
9991030 // Transcode middle
10001031 while readIdx < readEnd {
10011032 let ( scalar, len) = unsafe _decodeScalar( utf8, startingAt: readIdx)
@@ -1009,13 +1040,13 @@ extension String.UTF16View {
10091040 writeIdx &+= 1
10101041 }
10111042 }
1012-
1043+
10131044 // Handle mid-transcoded-scalar final index
10141045 if _slowPath ( range. upperBound. transcodedOffset == 1 ) {
10151046 _internalInvariant ( writeIdx < writeEnd)
10161047 let ( scalar, _) = unsafe _decodeScalar( utf8, startingAt: readIdx)
10171048 _internalInvariant ( scalar. utf16. count == 2 )
1018-
1049+
10191050 // Note: this is intentionally not using the _unchecked subscript.
10201051 // (We rely on debug assertions to catch out of bounds access.)
10211052 unsafe buffer[ writeIdx] = scalar. utf16 [ 0 ]
0 commit comments