@@ -2056,6 +2056,32 @@ impl<'a> From<&'a str> for Text<'a> {
20562056 }
20572057}
20582058
2059+ /// Docs
2060+ #[ derive( Clone , Debug , PartialEq , Eq ) ]
2061+ pub struct Binary < ' a > {
2062+ /// Field
2063+ pub text : Cow < ' a , [ u8 ] > ,
2064+ }
2065+
2066+
2067+ impl < ' a > Deref for Binary < ' a > {
2068+ type Target = [ u8 ] ;
2069+
2070+ #[ inline]
2071+ fn deref ( & self ) -> & Self :: Target {
2072+ self . text . deref ( )
2073+ }
2074+ }
2075+
2076+ impl < ' a > From < & ' a [ u8 ] > for Binary < ' a > {
2077+ #[ inline]
2078+ fn from ( text : & ' a [ u8 ] ) -> Self {
2079+ Self {
2080+ text : Cow :: Borrowed ( text) ,
2081+ }
2082+ }
2083+ }
2084+
20592085////////////////////////////////////////////////////////////////////////////////////////////////////
20602086
20612087/// Simplified event which contains only these variants that used by deserializer
@@ -2074,6 +2100,8 @@ pub enum DeEvent<'a> {
20742100 /// [`Comment`]: Event::Comment
20752101 /// [`PI`]: Event::PI
20762102 Text ( Text < ' a > ) ,
2103+ /// Binary undecoded
2104+ Binary ( Binary < ' a > ) ,
20772105 /// End of XML document.
20782106 Eof ,
20792107}
@@ -2217,7 +2245,11 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
22172245 // FIXME: Actually, we should trim after decoding text, but now we trim before
22182246 continue ;
22192247 }
2220- self . drain_text ( e. unescape_with ( |entity| self . entity_resolver . resolve ( entity) ) ?)
2248+ match e. unescape_with ( |entity| self . entity_resolver . resolve ( entity) ) . map ( |res| self . drain_text ( res) ) {
2249+ Ok ( x) => x,
2250+ // failed to escape treat as binary blob.
2251+ Err ( _) => Ok ( DeEvent :: Binary ( Binary { text : e. into_inner ( ) } ) ) ,
2252+ }
22212253 }
22222254 PayloadEvent :: CData ( e) => self . drain_text ( e. decode ( ) ?) ,
22232255 PayloadEvent :: DocType ( e) => {
@@ -2687,6 +2719,8 @@ where
26872719 fn read_string_impl ( & mut self , allow_start : bool ) -> Result < Cow < ' de , str > , DeError > {
26882720 match self . next ( ) ? {
26892721 DeEvent :: Text ( e) => Ok ( e. text ) ,
2722+ // SAFETY: Binary event should never be emitted for decoded strings.
2723+ DeEvent :: Binary ( e) => unreachable ! ( "{:?}" , e) ,
26902724 // allow one nested level
26912725 DeEvent :: Start ( e) if allow_start => self . read_text ( e. name ( ) ) ,
26922726 DeEvent :: Start ( e) => Err ( DeError :: UnexpectedStart ( e. name ( ) . as_ref ( ) . to_owned ( ) ) ) ,
@@ -2708,10 +2742,12 @@ where
27082742 // The matching tag name is guaranteed by the reader
27092743 DeEvent :: End ( _) => Ok ( e. text ) ,
27102744 // SAFETY: Cannot be two consequent Text events, they would be merged into one
2711- DeEvent :: Text ( _) => unreachable ! ( ) ,
2745+ DeEvent :: Text ( _) | DeEvent :: Binary ( _ ) => unreachable ! ( ) ,
27122746 DeEvent :: Start ( e) => Err ( DeError :: UnexpectedStart ( e. name ( ) . as_ref ( ) . to_owned ( ) ) ) ,
27132747 DeEvent :: Eof => Err ( Error :: missed_end ( name, self . reader . decoder ( ) ) . into ( ) ) ,
27142748 } ,
2749+ // SAFETY: Binary event should never be emitted for decoded strings.
2750+ DeEvent :: Binary ( e) => unreachable ! ( "{:?}" , e) ,
27152751 // We can get End event in case of `<tag></tag>` or `<tag/>` input
27162752 // Return empty text in that case
27172753 // The matching tag name is guaranteed by the reader
@@ -2827,6 +2863,30 @@ where
28272863 }
28282864}
28292865
2866+ impl < ' de , R > Deserializer < ' de , IoReader < R > >
2867+ where
2868+ R : BufRead ,
2869+ {
2870+ /// Create new deserializer that will copy data from the specified reader
2871+ /// into internal buffer.
2872+ ///
2873+ /// If you already have a string use [`Self::from_str`] instead, because it
2874+ /// will borrow instead of copy. If you have `&[u8]` which is known to represent
2875+ /// UTF-8, you can decode it first before using [`from_str`].
2876+ ///
2877+ /// Deserializer created with this method will not resolve custom entities.
2878+ pub fn from_custom_reader ( reader : Reader < R > ) -> Self {
2879+ Self :: new (
2880+ IoReader {
2881+ reader,
2882+ start_trimmer : StartTrimmer :: default ( ) ,
2883+ buf : Vec :: new ( ) ,
2884+ } ,
2885+ PredefinedEntityResolver
2886+ )
2887+ }
2888+ }
2889+
28302890impl < ' de , R , E > Deserializer < ' de , IoReader < R > , E >
28312891where
28322892 R : BufRead ,
@@ -2884,6 +2944,10 @@ where
28842944 Cow :: Borrowed ( s) => visitor. visit_borrowed_str ( s) ,
28852945 Cow :: Owned ( s) => visitor. visit_string ( s) ,
28862946 } ,
2947+ DeEvent :: Binary ( e) => match e. text {
2948+ Cow :: Borrowed ( s) => visitor. visit_borrowed_bytes ( s) ,
2949+ Cow :: Owned ( s) => visitor. visit_byte_buf ( s) ,
2950+ } ,
28872951 DeEvent :: Eof => Err ( DeError :: UnexpectedEof ) ,
28882952 }
28892953 }
@@ -2914,7 +2978,7 @@ where
29142978 self . read_to_end ( s. name ( ) ) ?;
29152979 visitor. visit_unit ( )
29162980 }
2917- DeEvent :: Text ( _) => visitor. visit_unit ( ) ,
2981+ DeEvent :: Text ( _) | DeEvent :: Binary ( _ ) => visitor. visit_unit ( ) ,
29182982 // SAFETY: The reader is guaranteed that we don't have unmatched tags
29192983 // If we here, then out deserializer has a bug
29202984 DeEvent :: End ( e) => unreachable ! ( "{:?}" , e) ,
0 commit comments