@@ -169,6 +169,14 @@ impl fmt::Debug for Wtf8Buf {
169169 }
170170}
171171
172+ /// Formats the string with unpaired surrogates substituted with the replacement
173+ /// character, U+FFFD.
174+ impl fmt:: Display for Wtf8Buf {
175+ fn fmt ( & self , formatter : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
176+ fmt:: Display :: fmt ( & * * self , formatter)
177+ }
178+ }
179+
172180impl Wtf8Buf {
173181 /// Creates a new, empty WTF-8 string.
174182 #[ inline]
@@ -564,23 +572,40 @@ impl fmt::Debug for Wtf8 {
564572/// Formats the string with unpaired surrogates substituted with the replacement
565573/// character, U+FFFD.
566574impl fmt:: Display for Wtf8 {
567- fn fmt ( & self , formatter : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
568- let wtf8_bytes = & self . bytes ;
569- let mut pos = 0 ;
570- loop {
571- match self . next_surrogate ( pos) {
572- Some ( ( surrogate_pos, _) ) => {
573- formatter. write_str ( unsafe {
574- str:: from_utf8_unchecked ( & wtf8_bytes[ pos..surrogate_pos] )
575- } ) ?;
576- formatter. write_str ( UTF8_REPLACEMENT_CHARACTER ) ?;
577- pos = surrogate_pos + 3 ;
578- }
579- None => {
580- let s = unsafe { str:: from_utf8_unchecked ( & wtf8_bytes[ pos..] ) } ;
581- if pos == 0 { return s. fmt ( formatter) } else { return formatter. write_str ( s) }
582- }
583- }
575+ fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
576+ // Corresponds to `Formatter::pad`, but for `Wtf8` instead of `str`.
577+
578+ // Make sure there's a fast path up front.
579+ if f. options ( ) . get_width ( ) . is_none ( ) && f. options ( ) . get_precision ( ) . is_none ( ) {
580+ return self . write_lossy ( f) ;
581+ }
582+
583+ // The `precision` field can be interpreted as a maximum width for the
584+ // string being formatted.
585+ let max_code_point_count = f. options ( ) . get_precision ( ) . unwrap_or ( usize:: MAX ) ;
586+ let mut iter = self . code_points ( ) ;
587+ let code_point_count = iter. by_ref ( ) . take ( max_code_point_count) . count ( ) ;
588+
589+ // If our string is longer than the maximum width, truncate it and
590+ // handle other flags in terms of the truncated string.
591+ let byte_len = self . len ( ) - iter. as_slice ( ) . len ( ) ;
592+ // SAFETY: The index is derived from the offset of `.code_points()`,
593+ // which is guaranteed to be in-bounds and between character boundaries.
594+ let s = unsafe { Wtf8 :: from_bytes_unchecked ( self . bytes . get_unchecked ( ..byte_len) ) } ;
595+
596+ // The `width` field is more of a minimum width parameter at this point.
597+ if let Some ( width) = f. options ( ) . get_width ( )
598+ && code_point_count < width
599+ {
600+ // If we're under the minimum width, then fill up the minimum width
601+ // with the specified string + some alignment.
602+ let post_padding = f. padding ( width - code_point_count, fmt:: Alignment :: Left ) ?;
603+ s. write_lossy ( f) ?;
604+ post_padding. write ( f)
605+ } else {
606+ // If we're over the minimum width or there is no minimum width, we
607+ // can just emit the string.
608+ s. write_lossy ( f)
584609 }
585610 }
586611}
@@ -696,6 +721,19 @@ impl Wtf8 {
696721 }
697722 }
698723
724+ /// Writes the string as lossy UTF-8 like [`Wtf8::to_string_lossy`].
725+ /// It ignores formatter flags.
726+ fn write_lossy ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
727+ let wtf8_bytes = & self . bytes ;
728+ let mut pos = 0 ;
729+ while let Some ( ( surrogate_pos, _) ) = self . next_surrogate ( pos) {
730+ f. write_str ( unsafe { str:: from_utf8_unchecked ( & wtf8_bytes[ pos..surrogate_pos] ) } ) ?;
731+ f. write_str ( UTF8_REPLACEMENT_CHARACTER ) ?;
732+ pos = surrogate_pos + 3 ;
733+ }
734+ f. write_str ( unsafe { str:: from_utf8_unchecked ( & wtf8_bytes[ pos..] ) } )
735+ }
736+
699737 /// Converts the WTF-8 string to potentially ill-formed UTF-16
700738 /// and return an iterator of 16-bit code units.
701739 ///
@@ -980,6 +1018,16 @@ impl Iterator for Wtf8CodePoints<'_> {
9801018 }
9811019}
9821020
1021+ impl < ' a > Wtf8CodePoints < ' a > {
1022+ /// Views the underlying data as a subslice of the original data.
1023+ #[ inline]
1024+ pub fn as_slice ( & self ) -> & Wtf8 {
1025+ // SAFETY: `Wtf8CodePoints` is only made from a `Wtf8Str`, which
1026+ // guarantees the iter is valid WTF-8.
1027+ unsafe { Wtf8 :: from_bytes_unchecked ( self . bytes . as_slice ( ) ) }
1028+ }
1029+ }
1030+
9831031/// Generates a wide character sequence for potentially ill-formed UTF-16.
9841032#[ stable( feature = "rust1" , since = "1.0.0" ) ]
9851033#[ derive( Clone ) ]
0 commit comments