@@ -302,19 +302,13 @@ extension _StringGuts {
302302// Encoding
303303extension _StringGuts {
304304 /// Returns whether this string has a UTF-8 storage representation.
305+ /// If this returns false, then the string is encoded in UTF-16.
305306 ///
306307 /// This always returns a value corresponding to the string's actual encoding.
307308 @_alwaysEmitIntoClient
308309 @inline ( __always)
309310 internal var isUTF8 : Bool { _object. isUTF8 }
310311
311- /// Returns whether this string has a UTF-16 storage representation.
312- ///
313- /// This always returns a value corresponding to the string's actual encoding.
314- @_alwaysEmitIntoClient
315- @inline ( __always)
316- internal var isUTF16 : Bool { _object. isUTF16 }
317-
318312 @_alwaysEmitIntoClient // Swift 5.7
319313 @inline ( __always)
320314 internal func markEncoding( _ i: String . Index ) -> String . Index {
@@ -334,41 +328,75 @@ extension _StringGuts {
334328 i. _hasMatchingEncoding ( isUTF8: isUTF8)
335329 }
336330
337- /// Return an index whose encoding can be assumed to match that of `self`.
331+ /// Return an index whose encoding can be assumed to match that of `self`,
332+ /// trapping if `i` has an incompatible encoding.
333+ ///
334+ /// If `i` is UTF-8 encoded, but `self` is an UTF-16 string, then trap.
335+ ///
336+ /// If `i` is UTF-16 encoded, but `self` is an UTF-8 string, then transcode
337+ /// `i`'s offset to UTF-8 and return the resulting index. This allows the use
338+ /// of indices from a bridged Cocoa string after the string has been converted
339+ /// to a native Swift string. (Such indices are technically still considered
340+ /// invalid, but we allow this specific case to keep compatibility with
341+ /// existing code that assumes otherwise.)
338342 ///
339343 /// Detecting an encoding mismatch isn't always possible -- older binaries did
340344 /// not set the flags that this method relies on. However, false positives
341345 /// cannot happen: if this method detects a mismatch, then it is guaranteed to
342346 /// be a real one.
343347 @_alwaysEmitIntoClient
344348 @inline ( __always)
345- internal func ensureMatchingEncoding( _ i: String . Index ) -> String . Index {
349+ internal func ensureMatchingEncoding( _ i: Index ) -> Index {
346350 if _fastPath ( hasMatchingEncoding ( i) ) { return i }
351+ if let i = _slowEnsureMatchingEncoding ( i) { return i }
352+ // Note that this trap is not guaranteed to trigger when the process
353+ // includes client binaries compiled with a previous Swift release.
354+ // (`i._canBeUTF16` can sometimes return true in that case even if the index
355+ // actually came from an UTF-8 string.) However, the trap will still often
356+ // trigger in this case, as long as the index was initialized by code that
357+ // was compiled with 5.7+.
358+ //
359+ // This trap will rarely if ever trigger on OSes that have stdlibs <= 5.6,
360+ // because those versions never set the `isKnownUTF16` flag in
361+ // `_StringObject`. (The flag may still be set within inlinable code,
362+ // though.)
363+ _preconditionFailure ( " Invalid string index " )
364+ }
365+
366+ /// Return an index that corresponds to the same position as `i`, but whose
367+ /// encoding can be assumed to match that of `self`, returning `nil` if `i`
368+ /// has incompatible encoding.
369+ ///
370+ /// If `i` is UTF-8 encoded, but `self` is an UTF-16 string, then return nil.
371+ ///
372+ /// If `i` is UTF-16 encoded, but `self` is an UTF-8 string, then transcode
373+ /// `i`'s offset to UTF-8 and return the resulting index. This allows the use
374+ /// of indices from a bridged Cocoa string after the string has been converted
375+ /// to a native Swift string. (Such indices are technically still considered
376+ /// invalid, but we allow this specific case to keep compatibility with
377+ /// existing code that assumes otherwise.)
378+ ///
379+ /// Detecting an encoding mismatch isn't always possible -- older binaries did
380+ /// not set the flags that this method relies on. However, false positives
381+ /// cannot happen: if this method detects a mismatch, then it is guaranteed to
382+ /// be a real one.
383+ internal func ensureMatchingEncodingNoTrap( _ i: Index ) -> Index ? {
384+ if hasMatchingEncoding ( i) { return i }
347385 return _slowEnsureMatchingEncoding ( i)
348386 }
349387
350388 @_alwaysEmitIntoClient
351389 @inline ( never)
352390 @_effects ( releasenone)
353- internal func _slowEnsureMatchingEncoding( _ i: String . Index ) -> String . Index {
391+ internal func _slowEnsureMatchingEncoding( _ i: Index ) -> Index ? {
354392 guard isUTF8 else {
355393 // Attempt to use an UTF-8 index on a UTF-16 string. Strings don't usually
356- // get converted to UTF-16 storage, so it seems okay to trap in this case
357- // -- the index most likely comes from an unrelated string. (Trapping here
358- // may still turn out to affect binary compatibility with broken code in
394+ // get converted to UTF-16 storage, so it seems okay to reject this case
395+ // -- the index most likely comes from an unrelated string. (This may
396+ // still turn out to affect binary compatibility with broken code in
359397 // existing binaries running with new stdlibs. If so, we can replace this
360398 // with the same transcoding hack as in the UTF-16->8 case below.)
361- //
362- // Note that this trap is not guaranteed to trigger when the process
363- // includes client binaries compiled with a previous Swift release.
364- // (`i._canBeUTF16` can sometimes return true in that case even if the
365- // index actually came from an UTF-8 string.) However, the trap will still
366- // often trigger in this case, as long as the index was initialized by
367- // code that was compiled with 5.7+.
368- //
369- // This trap can never trigger on OSes that have stdlibs <= 5.6, because
370- // those versions never set the `isKnownUTF16` flag in `_StringObject`.
371- _preconditionFailure ( " Invalid string index " )
399+ return nil
372400 }
373401 // Attempt to use an UTF-16 index on a UTF-8 string.
374402 //
@@ -384,10 +412,15 @@ extension _StringGuts {
384412 // FIXME: Consider emitting a runtime warning here.
385413 // FIXME: Consider performing a linked-on-or-after check & trapping if the
386414 // client executable was built on some particular future Swift release.
387- let utf16 = String ( self ) . utf16
388- let base = utf16. index ( utf16. startIndex, offsetBy: i. _encodedOffset)
389- if i. transcodedOffset == 0 { return base }
390- return base. encoded ( offsetBy: i. transcodedOffset) . _knownUTF8
415+ let utf16 = String . UTF16View ( self )
416+ var r = utf16. index ( utf16. startIndex, offsetBy: i. _encodedOffset)
417+ if i. transcodedOffset != 0 {
418+ r = r. encoded ( offsetBy: i. transcodedOffset)
419+ } else {
420+ // Preserve alignment bits if possible.
421+ r = r. _copyingAlignment ( from: i)
422+ }
423+ return r. _knownUTF8
391424 }
392425}
393426
0 commit comments