@@ -347,79 +347,55 @@ extension _StringGuts {
347347 @inline ( __always)
348348 internal func ensureMatchingEncoding( _ i: Index ) -> Index {
349349 if _fastPath ( hasMatchingEncoding ( i) ) { return i }
350- if let i = _slowEnsureMatchingEncoding ( i) { return i }
351- // Note that this trap is not guaranteed to trigger when the process
352- // includes client binaries compiled with a previous Swift release.
353- // (`i._canBeUTF16` can sometimes return true in that case even if the index
354- // actually came from an UTF-8 string.) However, the trap will still often
355- // trigger in this case, as long as the index was initialized by code that
356- // was compiled with 5.7+.
357- //
358- // This trap will rarely if ever trigger on OSes that have stdlibs <= 5.6,
359- // because those versions never set the `isKnownUTF16` flag in
360- // `_StringObject`. (The flag may still be set within inlinable code,
361- // though.)
362- _preconditionFailure ( " Invalid string index " )
363- }
364-
365- /// Return an index that corresponds to the same position as `i`, but whose
366- /// encoding can be assumed to match that of `self`, returning `nil` if `i`
367- /// has incompatible encoding.
368- ///
369- /// If `i` is UTF-8 encoded, but `self` is an UTF-16 string, then return nil.
370- ///
371- /// If `i` is UTF-16 encoded, but `self` is an UTF-8 string, then transcode
372- /// `i`'s offset to UTF-8 and return the resulting index. This allows the use
373- /// of indices from a bridged Cocoa string after the string has been converted
374- /// to a native Swift string. (Such indices are technically still considered
375- /// invalid, but we allow this specific case to keep compatibility with
376- /// existing code that assumes otherwise.)
377- ///
378- /// Detecting an encoding mismatch isn't always possible -- older binaries did
379- /// not set the flags that this method relies on. However, false positives
380- /// cannot happen: if this method detects a mismatch, then it is guaranteed to
381- /// be a real one.
382- internal func ensureMatchingEncodingNoTrap( _ i: Index ) -> Index ? {
383- if hasMatchingEncoding ( i) { return i }
384350 return _slowEnsureMatchingEncoding ( i)
385351 }
386352
387353 @_alwaysEmitIntoClient
388354 @inline ( never)
389355 @_effects ( releasenone)
390- internal func _slowEnsureMatchingEncoding( _ i: Index ) -> Index ? {
391- guard isUTF8 else {
392- // Attempt to use an UTF-8 index on a UTF-16 string. Strings don't usually
393- // get converted to UTF-16 storage, so it seems okay to reject this case
394- // -- the index most likely comes from an unrelated string. (This may
395- // still turn out to affect binary compatibility with broken code in
396- // existing binaries running with new stdlibs. If so, we can replace this
397- // with the same transcoding hack as in the UTF-16->8 case below.)
398- return nil
356+ internal func _slowEnsureMatchingEncoding( _ i: Index ) -> Index {
357+ // Attempt to recover from mismatched encodings between a string and its
358+ // index.
359+
360+ if isUTF8 {
361+ // Attempt to use an UTF-16 index on a UTF-8 string.
362+ //
363+ // This can happen if `self` was originally verbatim-bridged, and someone
364+ // mistakenly attempts to keep using an old index after a mutation. This
365+ // is technically an error, but trapping here would trigger a lot of
366+ // broken code that previously happened to work "fine" on e.g. ASCII
367+ // strings. Instead, attempt to convert the offset to UTF-8 code units by
368+ // transcoding the string. This can be slow, but it often results in a
369+ // usable index, even if non-ASCII characters are present. (UTF-16
370+ // breadcrumbs help reduce the severity of the slowdown.)
371+
372+ // FIXME: Consider emitting a runtime warning here.
373+ // FIXME: Consider performing a linked-on-or-after check & trapping if the
374+ // client executable was built on some particular future Swift release.
375+ let utf16 = String . UTF16View ( self )
376+ var r = utf16. index ( utf16. startIndex, offsetBy: i. _encodedOffset)
377+ if i. transcodedOffset != 0 {
378+ r = r. encoded ( offsetBy: i. transcodedOffset)
379+ } else {
380+ // Preserve alignment bits if possible.
381+ r = r. _copyingAlignment ( from: i)
382+ }
383+ return r. _knownUTF8
399384 }
400- // Attempt to use an UTF-16 index on a UTF-8 string.
401- //
402- // This can happen if `self` was originally verbatim-bridged, and someone
403- // mistakenly attempts to keep using an old index after a mutation. This is
404- // technically an error, but trapping here would trigger a lot of broken
405- // code that previously happened to work "fine" on e.g. ASCII strings.
406- // Instead, attempt to convert the offset to UTF-8 code units by transcoding
407- // the string. This can be slow, but it often results in a usable index,
408- // even if non-ASCII characters are present. (UTF-16 breadcrumbs help reduce
409- // the severity of the slowdown.)
410-
411- // FIXME: Consider emitting a runtime warning here.
412- // FIXME: Consider performing a linked-on-or-after check & trapping if the
413- // client executable was built on some particular future Swift release.
414- let utf16 = String . UTF16View ( self )
415- var r = utf16. index ( utf16. startIndex, offsetBy: i. _encodedOffset)
385+
386+ // Attempt to use an UTF-8 index on a UTF-16 string. This is rarer, but it
387+ // can still happen when e.g. people apply an index they got from
388+ // `AttributedString` on the original (bridged) string that they constructed
389+ // it from.
390+ let utf8 = String . UTF8View ( self )
391+ var r = utf8. index ( utf8. startIndex, offsetBy: i. _encodedOffset)
416392 if i. transcodedOffset != 0 {
417393 r = r. encoded ( offsetBy: i. transcodedOffset)
418394 } else {
419395 // Preserve alignment bits if possible.
420396 r = r. _copyingAlignment ( from: i)
421397 }
422- return r. _knownUTF8
398+ return r. _knownUTF16
423399 }
424400}
425401
0 commit comments