@@ -301,19 +301,13 @@ extension _StringGuts {
301301// Encoding
302302extension _StringGuts {
303303 /// Returns whether this string has a UTF-8 storage representation.
304+ /// If this returns false, then the string is encoded in UTF-16.
304305 ///
305306 /// This always returns a value corresponding to the string's actual encoding.
306307 @_alwaysEmitIntoClient
307308 @inline ( __always)
308309 internal var isUTF8 : Bool { _object. isUTF8 }
309310
310- /// Returns whether this string has a UTF-16 storage representation.
311- ///
312- /// This always returns a value corresponding to the string's actual encoding.
313- @_alwaysEmitIntoClient
314- @inline ( __always)
315- internal var isUTF16 : Bool { _object. isUTF16 }
316-
317311 @_alwaysEmitIntoClient // Swift 5.7
318312 @inline ( __always)
319313 internal func markEncoding( _ i: String . Index ) -> String . Index {
@@ -333,41 +327,75 @@ extension _StringGuts {
333327 i. _hasMatchingEncoding ( isUTF8: isUTF8)
334328 }
335329
336- /// Return an index whose encoding can be assumed to match that of `self`.
330+ /// Return an index whose encoding can be assumed to match that of `self`,
331+ /// trapping if `i` has an incompatible encoding.
332+ ///
333+ /// If `i` is UTF-8 encoded, but `self` is an UTF-16 string, then trap.
334+ ///
335+ /// If `i` is UTF-16 encoded, but `self` is an UTF-8 string, then transcode
336+ /// `i`'s offset to UTF-8 and return the resulting index. This allows the use
337+ /// of indices from a bridged Cocoa string after the string has been converted
338+ /// to a native Swift string. (Such indices are technically still considered
339+ /// invalid, but we allow this specific case to keep compatibility with
340+ /// existing code that assumes otherwise.)
337341 ///
338342 /// Detecting an encoding mismatch isn't always possible -- older binaries did
339343 /// not set the flags that this method relies on. However, false positives
340344 /// cannot happen: if this method detects a mismatch, then it is guaranteed to
341345 /// be a real one.
342346 @_alwaysEmitIntoClient
343347 @inline ( __always)
344- internal func ensureMatchingEncoding( _ i: String . Index ) -> String . Index {
348+ internal func ensureMatchingEncoding( _ i: Index ) -> Index {
345349 if _fastPath ( hasMatchingEncoding ( i) ) { return i }
350+ if let i = _slowEnsureMatchingEncoding ( i) { return i }
351+ // Note that this trap is not guaranteed to trigger when the process
352+ // includes client binaries compiled with a previous Swift release.
353+ // (`i._canBeUTF16` can sometimes return true in that case even if the index
354+ // actually came from an UTF-8 string.) However, the trap will still often
355+ // trigger in this case, as long as the index was initialized by code that
356+ // was compiled with 5.7+.
357+ //
358+ // This trap will rarely if ever trigger on OSes that have stdlibs <= 5.6,
359+ // because those versions never set the `isKnownUTF16` flag in
360+ // `_StringObject`. (The flag may still be set within inlinable code,
361+ // though.)
362+ _preconditionFailure ( " Invalid string index " )
363+ }
364+
365+ /// Return an index that corresponds to the same position as `i`, but whose
366+ /// encoding can be assumed to match that of `self`, returning `nil` if `i`
367+ /// has incompatible encoding.
368+ ///
369+ /// If `i` is UTF-8 encoded, but `self` is an UTF-16 string, then return nil.
370+ ///
371+ /// If `i` is UTF-16 encoded, but `self` is an UTF-8 string, then transcode
372+ /// `i`'s offset to UTF-8 and return the resulting index. This allows the use
373+ /// of indices from a bridged Cocoa string after the string has been converted
374+ /// to a native Swift string. (Such indices are technically still considered
375+ /// invalid, but we allow this specific case to keep compatibility with
376+ /// existing code that assumes otherwise.)
377+ ///
378+ /// Detecting an encoding mismatch isn't always possible -- older binaries did
379+ /// not set the flags that this method relies on. However, false positives
380+ /// cannot happen: if this method detects a mismatch, then it is guaranteed to
381+ /// be a real one.
382+ internal func ensureMatchingEncodingNoTrap( _ i: Index ) -> Index ? {
383+ if hasMatchingEncoding ( i) { return i }
346384 return _slowEnsureMatchingEncoding ( i)
347385 }
348386
349387 @_alwaysEmitIntoClient
350388 @inline ( never)
351389 @_effects ( releasenone)
352- internal func _slowEnsureMatchingEncoding( _ i: String . Index ) -> String . Index {
390+ internal func _slowEnsureMatchingEncoding( _ i: Index ) -> Index ? {
353391 guard isUTF8 else {
354392 // Attempt to use an UTF-8 index on a UTF-16 string. Strings don't usually
355- // get converted to UTF-16 storage, so it seems okay to trap in this case
356- // -- the index most likely comes from an unrelated string. (Trapping here
357- // may still turn out to affect binary compatibility with broken code in
393+ // get converted to UTF-16 storage, so it seems okay to reject this case
394+ // -- the index most likely comes from an unrelated string. (This may
395+ // still turn out to affect binary compatibility with broken code in
358396 // existing binaries running with new stdlibs. If so, we can replace this
359397 // with the same transcoding hack as in the UTF-16->8 case below.)
360- //
361- // Note that this trap is not guaranteed to trigger when the process
362- // includes client binaries compiled with a previous Swift release.
363- // (`i._canBeUTF16` can sometimes return true in that case even if the
364- // index actually came from an UTF-8 string.) However, the trap will still
365- // often trigger in this case, as long as the index was initialized by
366- // code that was compiled with 5.7+.
367- //
368- // This trap can never trigger on OSes that have stdlibs <= 5.6, because
369- // those versions never set the `isKnownUTF16` flag in `_StringObject`.
370- _preconditionFailure ( " Invalid string index " )
398+ return nil
371399 }
372400 // Attempt to use an UTF-16 index on a UTF-8 string.
373401 //
@@ -383,10 +411,15 @@ extension _StringGuts {
383411 // FIXME: Consider emitting a runtime warning here.
384412 // FIXME: Consider performing a linked-on-or-after check & trapping if the
385413 // client executable was built on some particular future Swift release.
386- let utf16 = String ( self ) . utf16
387- let base = utf16. index ( utf16. startIndex, offsetBy: i. _encodedOffset)
388- if i. transcodedOffset == 0 { return base }
389- return base. encoded ( offsetBy: i. transcodedOffset) . _knownUTF8
414+ let utf16 = String . UTF16View ( self )
415+ var r = utf16. index ( utf16. startIndex, offsetBy: i. _encodedOffset)
416+ if i. transcodedOffset != 0 {
417+ r = r. encoded ( offsetBy: i. transcodedOffset)
418+ } else {
419+ // Preserve alignment bits if possible.
420+ r = r. _copyingAlignment ( from: i)
421+ }
422+ return r. _knownUTF8
390423 }
391424}
392425
0 commit comments