|
2 | 2 | //! underlying byte stream. This implementation supports not using an |
3 | 3 | //! intermediate buffer as the byte slice itself can be used to borrow from. |
4 | 4 |
|
| 5 | +use std::borrow::Cow; |
| 6 | + |
5 | 7 | #[cfg(feature = "encoding")] |
6 | 8 | use crate::reader::EncodingRef; |
7 | 9 | #[cfg(feature = "encoding")] |
@@ -153,6 +155,78 @@ impl<'a> Reader<&'a [u8]> { |
153 | 155 | pub fn read_to_end(&mut self, end: QName) -> Result<Span> { |
154 | 156 | Ok(read_to_end!(self, end, (), read_event_impl, {})) |
155 | 157 | } |
| 158 | + |
| 159 | + /// Reads content between start and end tags, including any markup. This |
| 160 | + /// function is supposed to be called after you already read a [`Start`] event. |
| 161 | + /// |
| 162 | + /// Manages nested cases where parent and child elements have the same name. |
| 163 | + /// |
| 164 | + /// This method does not unescape read data, instead it returns content |
| 165 | + /// "as is" of the XML document. This is because it has no idea what text |
| 166 | + /// it reads, and if, for example, it contains CDATA section, attempt to |
| 167 | + /// unescape it content will spoil data. |
| 168 | + /// |
| 169 | + /// Any text will be decoded using the XML current [`decoder()`]. |
| 170 | + /// |
| 171 | + /// Actually, this method perform the following code: |
| 172 | + /// |
| 173 | + /// ```ignore |
| 174 | + /// let span = reader.read_to_end(end)?; |
| 175 | + /// let text = reader.decoder().decode(&reader.inner_slice[span]); |
| 176 | + /// ``` |
| 177 | + /// |
| 178 | + /// # Examples |
| 179 | + /// |
| 180 | + /// This example shows, how you can read a HTML content from your XML document. |
| 181 | + /// |
| 182 | + /// ``` |
| 183 | + /// # use pretty_assertions::assert_eq; |
| 184 | + /// # use std::borrow::Cow; |
| 185 | + /// use quick_xml::events::{BytesStart, Event}; |
| 186 | + /// use quick_xml::Reader; |
| 187 | + /// |
| 188 | + /// let mut reader = Reader::from_str(" |
| 189 | + /// <html> |
| 190 | + /// <title>This is a HTML text</title> |
| 191 | + /// <p>Usual XML rules does not apply inside it |
| 192 | + /// <p>For example, elements not needed to be "closed" |
| 193 | + /// </html> |
| 194 | + /// "); |
| 195 | + /// reader.trim_text(true); |
| 196 | + /// |
| 197 | + /// let start = BytesStart::new("html"); |
| 198 | + /// let end = start.to_end().into_owned(); |
| 199 | + /// |
| 200 | + /// // First, we read a start event... |
| 201 | + /// assert_eq!(reader.read_event().unwrap(), Event::Start(start)); |
| 202 | + /// // ...and disable checking of end names because we expect HTML further... |
| 203 | + /// reader.check_end_names(false); |
| 204 | + /// |
| 205 | + /// // ...then, we could read text content until close tag. |
| 206 | + /// // This call will correctly handle nested <html> elements. |
| 207 | + /// let text = reader.read_text(end.name()).unwrap(); |
| 208 | + /// assert_eq!(text, Cow::Borrowed(r#" |
| 209 | + /// <title>This is a HTML text</title> |
| 210 | + /// <p>Usual XML rules does not apply inside it |
| 211 | + /// <p>For example, elements not needed to be "closed" |
| 212 | + /// "#)); |
| 213 | + /// |
| 214 | + /// // Now we can enable checks again |
| 215 | + /// reader.check_end_names(true); |
| 216 | + /// |
| 217 | + /// // At the end we should get an Eof event, because we ate the whole XML |
| 218 | + /// assert_eq!(reader.read_event().unwrap(), Event::Eof); |
| 219 | + /// ``` |
| 220 | + /// |
| 221 | + /// [`Start`]: Event::Start |
| 222 | + /// [`decoder()`]: Self::decoder() |
| 223 | + pub fn read_text(&mut self, end: QName) -> Result<Cow<'a, str>> { |
| 224 | + // self.reader will be changed, so store original reference |
| 225 | + let buffer = self.reader; |
| 226 | + let span = self.read_to_end(end)?; |
| 227 | + |
| 228 | + self.decoder().decode(&buffer[0..span.len()]) |
| 229 | + } |
156 | 230 | } |
157 | 231 |
|
158 | 232 | //////////////////////////////////////////////////////////////////////////////////////////////////// |
|
0 commit comments