@@ -2005,7 +2005,7 @@ use crate::{
20052005 errors:: Error ,
20062006 events:: { BytesCData , BytesEnd , BytesStart , BytesText , Event } ,
20072007 name:: QName ,
2008- reader:: Reader ,
2008+ reader:: { Config , Reader } ,
20092009} ;
20102010use serde:: de:: { self , Deserialize , DeserializeOwned , DeserializeSeed , SeqAccess , Visitor } ;
20112011use std:: borrow:: Cow ;
@@ -2168,6 +2168,31 @@ struct XmlReader<'i, R: XmlRead<'i>, E: EntityResolver = PredefinedEntityResolve
21682168 entity_resolver : E ,
21692169}
21702170
2171+ fn trim_cow < ' a , F > ( value : Cow < ' a , str > , trim : F ) -> Cow < ' a , str >
2172+ where
2173+ F : FnOnce ( & str ) -> & str ,
2174+ {
2175+ match value {
2176+ Cow :: Borrowed ( bytes) => Cow :: Borrowed ( trim ( bytes) ) ,
2177+ Cow :: Owned ( mut bytes) => {
2178+ let trimmed = trim ( & bytes) ;
2179+ if trimmed. len ( ) != bytes. len ( ) {
2180+ bytes = trimmed. to_string ( ) ;
2181+ }
2182+ Cow :: Owned ( bytes)
2183+ }
2184+ }
2185+ }
2186+
2187+ /// Removes trailing XML whitespace bytes from text content.
2188+ ///
2189+ /// Returns `true` if content is empty after that
2190+ fn inplace_trim_end ( mut s : & mut Cow < str > ) -> bool {
2191+ let c: Cow < str > = replace ( & mut s, Cow :: Borrowed ( "" ) ) ;
2192+ * s = trim_cow ( c, str:: trim_end) ;
2193+ s. is_empty ( )
2194+ }
2195+
21712196impl < ' i , R : XmlRead < ' i > , E : EntityResolver > XmlReader < ' i , R , E > {
21722197 fn new ( mut reader : R , entity_resolver : E ) -> Self {
21732198 // Lookahead by one event immediately, so we do not need to check in the
@@ -2206,19 +2231,22 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
22062231 /// occurs. Content of all events would be appended to `result` and returned
22072232 /// as [`DeEvent::Text`].
22082233 ///
2234+ /// If the resulting text empty, this function returns None to avoid creating an empty Event.
2235+ ///
22092236 /// [`Text`]: PayloadEvent::Text
22102237 /// [`CData`]: PayloadEvent::CData
2211- fn drain_text ( & mut self , mut result : Cow < ' i , str > ) -> Result < DeEvent < ' i > , DeError > {
2238+ fn drain_text ( & mut self , mut result : Cow < ' i , str > ) -> Result < Option < DeEvent < ' i > > , DeError > {
22122239 loop {
22132240 if self . current_event_is_last_text ( ) {
22142241 break ;
22152242 }
2216-
22172243 match self . next_impl ( ) ? {
22182244 PayloadEvent :: Text ( mut e) => {
22192245 if self . current_event_is_last_text ( ) {
22202246 // FIXME: Actually, we should trim after decoding text, but now we trim before
2221- e. inplace_trim_end ( ) ;
2247+ if self . reader . config ( ) . trim_text_end {
2248+ e. inplace_trim_end ( ) ;
2249+ }
22222250 }
22232251 result
22242252 . to_mut ( )
@@ -2227,10 +2255,12 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
22272255 PayloadEvent :: CData ( e) => result. to_mut ( ) . push_str ( & e. decode ( ) ?) ,
22282256
22292257 // SAFETY: current_event_is_last_text checks that event is Text or CData
2230- _ => unreachable ! ( "Only `Text` and `CData` events can come here" ) ,
2258+ e => {
2259+ unreachable ! ( "Only `Text` and `CData` events can come here: {:?}" , & e) ;
2260+ }
22312261 }
22322262 }
2233- Ok ( DeEvent :: Text ( Text { text : result } ) )
2263+ Ok ( Some ( DeEvent :: Text ( Text { text : result } ) ) )
22342264 }
22352265
22362266 /// Return an input-borrowing event.
@@ -2240,22 +2270,29 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
22402270 PayloadEvent :: Start ( e) => Ok ( DeEvent :: Start ( e) ) ,
22412271 PayloadEvent :: End ( e) => Ok ( DeEvent :: End ( e) ) ,
22422272 PayloadEvent :: Text ( mut e) => {
2243- if self . current_event_is_last_text ( ) && e. inplace_trim_end ( ) {
2244- // FIXME: Actually, we should trim after decoding text, but now we trim before
2245- continue ;
2273+ if self . current_event_is_last_text ( ) {
2274+ if self . reader . config ( ) . trim_text_end && e. inplace_trim_end ( ) {
2275+ continue ;
2276+ }
22462277 }
2278+
22472279 match e
22482280 . unescape_with ( |entity| self . entity_resolver . resolve ( entity) )
22492281 . map ( |res| self . drain_text ( res) )
22502282 {
2251- Ok ( x) => x,
2283+ Ok ( Ok ( None ) ) => continue ,
2284+ Ok ( Ok ( Some ( x) ) ) => Ok ( x) ,
2285+ Ok ( Err ( x) ) => Err ( x) ,
22522286 // failed to escape treat as binary blob.
22532287 Err ( _) => Ok ( DeEvent :: Binary ( Binary {
22542288 text : e. into_inner ( ) ,
22552289 } ) ) ,
22562290 }
22572291 }
2258- PayloadEvent :: CData ( e) => self . drain_text ( e. decode ( ) ?) ,
2292+ PayloadEvent :: CData ( e) => match self . drain_text ( e. decode ( ) ?) . transpose ( ) {
2293+ None => continue ,
2294+ Some ( x) => x,
2295+ } ,
22592296 PayloadEvent :: DocType ( e) => {
22602297 self . entity_resolver
22612298 . capture ( e)
@@ -2838,6 +2875,8 @@ where
28382875 pub fn from_str_with_resolver ( source : & ' de str , entity_resolver : E ) -> Self {
28392876 let mut reader = Reader :: from_str ( source) ;
28402877 let config = reader. config_mut ( ) ;
2878+ config. trim_text_start = true ;
2879+ config. trim_text_end = true ;
28412880 config. expand_empty_elements = true ;
28422881
28432882 Self :: new (
@@ -3139,6 +3178,9 @@ pub trait XmlRead<'i> {
31393178
31403179 /// A copy of the reader's decoder used to decode strings.
31413180 fn decoder ( & self ) -> Decoder ;
3181+
3182+ /// Returns a reference to the reader config.
3183+ fn config ( & self ) -> & Config ;
31423184}
31433185
31443186/// XML input source that reads from a std::io input stream.
@@ -3208,6 +3250,10 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
32083250 fn decoder ( & self ) -> Decoder {
32093251 self . reader . decoder ( )
32103252 }
3253+
3254+ fn config ( & self ) -> & Config {
3255+ self . reader . config ( )
3256+ }
32113257}
32123258
32133259/// XML input source that reads from a slice of bytes and can borrow from it.
@@ -3273,6 +3319,10 @@ impl<'de> XmlRead<'de> for SliceReader<'de> {
32733319 fn decoder ( & self ) -> Decoder {
32743320 self . reader . decoder ( )
32753321 }
3322+
3323+ fn config ( & self ) -> & Config {
3324+ self . reader . config ( )
3325+ }
32763326}
32773327
32783328#[ cfg( test) ]
0 commit comments