@@ -587,23 +587,40 @@ impl fmt::Debug for Wtf8 {
587587/// Formats the string with unpaired surrogates substituted with the replacement
588588/// character, U+FFFD.
589589impl fmt:: Display for Wtf8 {
590- fn fmt ( & self , formatter : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
591- let wtf8_bytes = & self . bytes ;
592- let mut pos = 0 ;
593- loop {
594- match self . next_surrogate ( pos) {
595- Some ( ( surrogate_pos, _) ) => {
596- formatter. write_str ( unsafe {
597- str:: from_utf8_unchecked ( & wtf8_bytes[ pos..surrogate_pos] )
598- } ) ?;
599- formatter. write_str ( UTF8_REPLACEMENT_CHARACTER ) ?;
600- pos = surrogate_pos + 3 ;
601- }
602- None => {
603- let s = unsafe { str:: from_utf8_unchecked ( & wtf8_bytes[ pos..] ) } ;
604- if pos == 0 { return s. fmt ( formatter) } else { return formatter. write_str ( s) }
605- }
606- }
590+ fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
591+ // Corresponds to `Formatter::pad`, but for `Wtf8` instead of `str`.
592+
593+ // Make sure there's a fast path up front.
594+ if f. options ( ) . get_width ( ) . is_none ( ) && f. options ( ) . get_precision ( ) . is_none ( ) {
595+ return self . write_lossy ( f) ;
596+ }
597+
598+ // The `precision` field can be interpreted as a maximum width for the
599+ // string being formatted.
600+ let max_code_point_count = f. options ( ) . get_precision ( ) . unwrap_or ( usize:: MAX ) ;
601+ let mut iter = self . code_points ( ) ;
602+ let code_point_count = iter. by_ref ( ) . take ( max_code_point_count) . count ( ) ;
603+
604+ // If our string is longer than the maximum width, truncate it and
605+ // handle other flags in terms of the truncated string.
606+ let byte_len = self . len ( ) - iter. as_slice ( ) . len ( ) ;
607+ // SAFETY: The index is derived from the offset of `.code_points()`,
608+ // which is guaranteed to be in-bounds and between character boundaries.
609+ let s = unsafe { Wtf8 :: from_bytes_unchecked ( self . bytes . get_unchecked ( ..byte_len) ) } ;
610+
611+ // The `width` field is more of a minimum width parameter at this point.
612+ if let Some ( width) = f. options ( ) . get_width ( )
613+ && code_point_count < width
614+ {
615+ // If we're under the minimum width, then fill up the minimum width
616+ // with the specified string + some alignment.
617+ let post_padding = f. padding ( width - code_point_count, fmt:: Alignment :: Left ) ?;
618+ s. write_lossy ( f) ?;
619+ post_padding. write ( f)
620+ } else {
621+ // If we're over the minimum width or there is no minimum width, we
622+ // can just emit the string.
623+ s. write_lossy ( f)
607624 }
608625 }
609626}
@@ -719,6 +736,19 @@ impl Wtf8 {
719736 }
720737 }
721738
739+ /// Writes the string as lossy UTF-8 like [`Wtf8::to_string_lossy`].
740+ /// It ignores formatter flags.
741+ fn write_lossy ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
742+ let wtf8_bytes = & self . bytes ;
743+ let mut pos = 0 ;
744+ while let Some ( ( surrogate_pos, _) ) = self . next_surrogate ( pos) {
745+ f. write_str ( unsafe { str:: from_utf8_unchecked ( & wtf8_bytes[ pos..surrogate_pos] ) } ) ?;
746+ f. write_str ( UTF8_REPLACEMENT_CHARACTER ) ?;
747+ pos = surrogate_pos + 3 ;
748+ }
749+ f. write_str ( unsafe { str:: from_utf8_unchecked ( & wtf8_bytes[ pos..] ) } )
750+ }
751+
722752 /// Converts the WTF-8 string to potentially ill-formed UTF-16
723753 /// and return an iterator of 16-bit code units.
724754 ///
@@ -1003,6 +1033,16 @@ impl Iterator for Wtf8CodePoints<'_> {
10031033 }
10041034}
10051035
1036+ impl < ' a > Wtf8CodePoints < ' a > {
1037+ /// Views the underlying data as a subslice of the original data.
1038+ #[ inline]
1039+ pub fn as_slice ( & self ) -> & Wtf8 {
1040+ // SAFETY: `Wtf8CodePoints` is only made from a `Wtf8Str`, which
1041+ // guarantees the iter is valid WTF-8.
1042+ unsafe { Wtf8 :: from_bytes_unchecked ( self . bytes . as_slice ( ) ) }
1043+ }
1044+ }
1045+
10061046/// Generates a wide character sequence for potentially ill-formed UTF-16.
10071047#[ stable( feature = "rust1" , since = "1.0.0" ) ]
10081048#[ derive( Clone ) ]
0 commit comments