@@ -2056,6 +2056,31 @@ impl<'a> From<&'a str> for Text<'a> {
20562056 }
20572057}
20582058
2059+ /// Docs
2060+ #[ derive( Clone , Debug , PartialEq , Eq ) ]
2061+ pub struct Binary < ' a > {
2062+ /// Field
2063+ pub text : Cow < ' a , [ u8 ] > ,
2064+ }
2065+
2066+ impl < ' a > Deref for Binary < ' a > {
2067+ type Target = [ u8 ] ;
2068+
2069+ #[ inline]
2070+ fn deref ( & self ) -> & Self :: Target {
2071+ self . text . deref ( )
2072+ }
2073+ }
2074+
2075+ impl < ' a > From < & ' a [ u8 ] > for Binary < ' a > {
2076+ #[ inline]
2077+ fn from ( text : & ' a [ u8 ] ) -> Self {
2078+ Self {
2079+ text : Cow :: Borrowed ( text) ,
2080+ }
2081+ }
2082+ }
2083+
20592084////////////////////////////////////////////////////////////////////////////////////////////////////
20602085
20612086/// Simplified event which contains only these variants that used by deserializer
@@ -2074,6 +2099,8 @@ pub enum DeEvent<'a> {
20742099 /// [`Comment`]: Event::Comment
20752100 /// [`PI`]: Event::PI
20762101 Text ( Text < ' a > ) ,
2102+ /// Binary undecoded
2103+ Binary ( Binary < ' a > ) ,
20772104 /// End of XML document.
20782105 Eof ,
20792106}
@@ -2217,7 +2244,16 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
22172244 // FIXME: Actually, we should trim after decoding text, but now we trim before
22182245 continue ;
22192246 }
2220- self . drain_text ( e. unescape_with ( |entity| self . entity_resolver . resolve ( entity) ) ?)
2247+ match e
2248+ . unescape_with ( |entity| self . entity_resolver . resolve ( entity) )
2249+ . map ( |res| self . drain_text ( res) )
2250+ {
2251+ Ok ( x) => x,
2252+ // failed to escape treat as binary blob.
2253+ Err ( _) => Ok ( DeEvent :: Binary ( Binary {
2254+ text : e. into_inner ( ) ,
2255+ } ) ) ,
2256+ }
22212257 }
22222258 PayloadEvent :: CData ( e) => self . drain_text ( e. decode ( ) ?) ,
22232259 PayloadEvent :: DocType ( e) => {
@@ -2687,6 +2723,8 @@ where
26872723 fn read_string_impl ( & mut self , allow_start : bool ) -> Result < Cow < ' de , str > , DeError > {
26882724 match self . next ( ) ? {
26892725 DeEvent :: Text ( e) => Ok ( e. text ) ,
2726+ // SAFETY: Binary event should never be emitted for decoded strings.
2727+ DeEvent :: Binary ( e) => unreachable ! ( "{:?}" , e) ,
26902728 // allow one nested level
26912729 DeEvent :: Start ( e) if allow_start => self . read_text ( e. name ( ) ) ,
26922730 DeEvent :: Start ( e) => Err ( DeError :: UnexpectedStart ( e. name ( ) . as_ref ( ) . to_owned ( ) ) ) ,
@@ -2708,10 +2746,12 @@ where
27082746 // The matching tag name is guaranteed by the reader
27092747 DeEvent :: End ( _) => Ok ( e. text ) ,
27102748 // SAFETY: Cannot be two consequent Text events, they would be merged into one
2711- DeEvent :: Text ( _) => unreachable ! ( ) ,
2749+ DeEvent :: Text ( _) | DeEvent :: Binary ( _ ) => unreachable ! ( ) ,
27122750 DeEvent :: Start ( e) => Err ( DeError :: UnexpectedStart ( e. name ( ) . as_ref ( ) . to_owned ( ) ) ) ,
27132751 DeEvent :: Eof => Err ( Error :: missed_end ( name, self . reader . decoder ( ) ) . into ( ) ) ,
27142752 } ,
2753+ // SAFETY: Binary event should never be emitted for decoded strings.
2754+ DeEvent :: Binary ( e) => unreachable ! ( "{:?}" , e) ,
27152755 // We can get End event in case of `<tag></tag>` or `<tag/>` input
27162756 // Return empty text in that case
27172757 // The matching tag name is guaranteed by the reader
@@ -2827,6 +2867,30 @@ where
28272867 }
28282868}
28292869
2870+ impl < ' de , R > Deserializer < ' de , IoReader < R > >
2871+ where
2872+ R : BufRead ,
2873+ {
2874+ /// Create new deserializer that will copy data from the specified reader
2875+ /// into internal buffer.
2876+ ///
2877+ /// If you already have a string use [`Self::from_str`] instead, because it
2878+ /// will borrow instead of copy. If you have `&[u8]` which is known to represent
2879+ /// UTF-8, you can decode it first before using [`from_str`].
2880+ ///
2881+ /// Deserializer created with this method will not resolve custom entities.
2882+ pub fn from_custom_reader ( reader : Reader < R > ) -> Self {
2883+ Self :: new (
2884+ IoReader {
2885+ reader,
2886+ start_trimmer : StartTrimmer :: default ( ) ,
2887+ buf : Vec :: new ( ) ,
2888+ } ,
2889+ PredefinedEntityResolver ,
2890+ )
2891+ }
2892+ }
2893+
28302894impl < ' de , R , E > Deserializer < ' de , IoReader < R > , E >
28312895where
28322896 R : BufRead ,
@@ -2884,6 +2948,10 @@ where
28842948 Cow :: Borrowed ( s) => visitor. visit_borrowed_str ( s) ,
28852949 Cow :: Owned ( s) => visitor. visit_string ( s) ,
28862950 } ,
2951+ DeEvent :: Binary ( e) => match e. text {
2952+ Cow :: Borrowed ( s) => visitor. visit_borrowed_bytes ( s) ,
2953+ Cow :: Owned ( s) => visitor. visit_byte_buf ( s) ,
2954+ } ,
28872955 DeEvent :: Eof => Err ( DeError :: UnexpectedEof ) ,
28882956 }
28892957 }
@@ -2914,7 +2982,7 @@ where
29142982 self . read_to_end ( s. name ( ) ) ?;
29152983 visitor. visit_unit ( )
29162984 }
2917- DeEvent :: Text ( _) => visitor. visit_unit ( ) ,
2985+ DeEvent :: Text ( _) | DeEvent :: Binary ( _ ) => visitor. visit_unit ( ) ,
29182986 // SAFETY: The reader is guaranteed that we don't have unmatched tags
29192987 // If we here, then out deserializer has a bug
29202988 DeEvent :: End ( e) => unreachable ! ( "{:?}" , e) ,
0 commit comments