Skip to content

Commit 7fafac4

Browse files
committed
Return Span of skipped bytes from read_to_end*
1 parent abec80f commit 7fafac4

File tree

7 files changed

+57
-23
lines changed

7 files changed

+57
-23
lines changed

Changelog.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
- [#439]: Added utilities `detect_encoding()`, `decode()`, and `decode_with_bom_removal()`
4141
under the `quick-xml::encoding` namespace.
4242
- [#450]: Added support of asynchronous [tokio](https://tokio.rs/) readers
43+
- [#455]: Change return type of all `read_to_end*` methods to return a span between tags
4344

4445

4546
### Bug Fixes
@@ -222,6 +223,7 @@
222223
[#440]: https://github.com/tafia/quick-xml/pull/440
223224
[#443]: https://github.com/tafia/quick-xml/pull/443
224225
[#450]: https://github.com/tafia/quick-xml/pull/450
226+
[#455]: https://github.com/tafia/quick-xml/pull/455
225227

226228

227229
## 0.23.0 -- 2022-05-08

src/de/mod.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -951,7 +951,8 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
951951
fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
952952
match self.reader.read_to_end_into(name, &mut self.buf) {
953953
Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
954-
other => Ok(other?),
954+
Err(e) => Err(e.into()),
955+
Ok(_) => Ok(()),
955956
}
956957
}
957958

@@ -991,7 +992,8 @@ impl<'de> XmlRead<'de> for SliceReader<'de> {
991992
fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
992993
match self.reader.read_to_end(name) {
993994
Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
994-
other => Ok(other?),
995+
Err(e) => Err(e.into()),
996+
Ok(_) => Ok(()),
995997
}
996998
}
997999

src/reader/async_tokio.rs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ use tokio::io::{self, AsyncBufRead, AsyncBufReadExt};
99
use crate::events::Event;
1010
use crate::name::{QName, ResolveResult};
1111
use crate::reader::buffered_reader::impl_buffered_source;
12-
use crate::reader::{is_whitespace, BangType, NsReader, ParseState, ReadElementState, Reader};
12+
use crate::reader::{
13+
is_whitespace, BangType, NsReader, ParseState, ReadElementState, Reader, Span,
14+
};
1315
use crate::{Error, Result};
1416

1517
/// A struct for read XML asynchronously from an [`AsyncBufRead`].
@@ -125,7 +127,7 @@ impl<R: AsyncBufRead + Unpin> Reader<R> {
125127
/// // First, we read a start event...
126128
/// assert_eq!(reader.read_event_into_async(&mut buf).await.unwrap(), Event::Start(start));
127129
///
128-
/// //...then, we could skip all events to the corresponding end event.
130+
/// // ...then, we could skip all events to the corresponding end event.
129131
/// // This call will correctly handle nested <outer> elements.
130132
/// // Note, however, that this method does not handle namespaces.
131133
/// reader.read_to_end_into_async(end.name(), &mut buf).await.unwrap();
@@ -142,8 +144,8 @@ impl<R: AsyncBufRead + Unpin> Reader<R> {
142144
// We should name that lifetime due to https://github.com/rust-lang/rust/issues/63033`
143145
end: QName<'n>,
144146
buf: &mut Vec<u8>,
145-
) -> Result<()> {
146-
read_to_end!(self, end, buf, read_event_into_async, { buf.clear(); }, await)
147+
) -> Result<Span> {
148+
Ok(read_to_end!(self, end, buf, read_event_into_async, { buf.clear(); }, await))
147149
}
148150

149151
/// Read until '<' is found and moves reader to an `Opened` state.
@@ -275,7 +277,7 @@ impl<R: AsyncBufRead + Unpin> NsReader<R> {
275277
/// (ResolveResult::Bound(ns), Event::Start(start))
276278
/// );
277279
///
278-
/// //...then, we could skip all events to the corresponding end event.
280+
/// // ...then, we could skip all events to the corresponding end event.
279281
/// // This call will correctly handle nested <outer> elements.
280282
/// // Note, however, that this method does not handle namespaces.
281283
/// reader.read_to_end_into_async(end.name(), &mut buf).await.unwrap();
@@ -295,7 +297,7 @@ impl<R: AsyncBufRead + Unpin> NsReader<R> {
295297
// We should name that lifetime due to https://github.com/rust-lang/rust/issues/63033`
296298
end: QName<'n>,
297299
buf: &mut Vec<u8>,
298-
) -> Result<()> {
300+
) -> Result<Span> {
299301
// According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
300302
// match literally the start name. See `Reader::check_end_names` documentation
301303
self.reader.read_to_end_into_async(end, buf).await

src/reader/buffered_reader.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use memchr;
1010
use crate::errors::{Error, Result};
1111
use crate::events::Event;
1212
use crate::name::QName;
13-
use crate::reader::{is_whitespace, BangType, ReadElementState, Reader, XmlSource};
13+
use crate::reader::{is_whitespace, BangType, ReadElementState, Reader, Span, XmlSource};
1414

1515
macro_rules! impl_buffered_source {
1616
($($lf:lifetime, $reader:tt, $async:ident, $await:ident)?) => {
@@ -277,6 +277,10 @@ impl<R: BufRead> Reader<R> {
277277
/// storage for events content. This function is supposed to be called after
278278
/// you already read a [`Start`] event.
279279
///
280+
/// Returns a span that cover content between `>` of an opening tag and `<` of
281+
/// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
282+
/// this method was called after reading expanded [`Start`] event.
283+
///
280284
/// Manages nested cases where parent and child elements have the same name.
281285
///
282286
/// If corresponding [`End`] event will not be found, the [`Error::UnexpectedEof`]
@@ -340,7 +344,7 @@ impl<R: BufRead> Reader<R> {
340344
/// // First, we read a start event...
341345
/// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Start(start));
342346
///
343-
/// //...then, we could skip all events to the corresponding end event.
347+
/// // ...then, we could skip all events to the corresponding end event.
344348
/// // This call will correctly handle nested <outer> elements.
345349
/// // Note, however, that this method does not handle namespaces.
346350
/// reader.read_to_end_into(end.name(), &mut buf).unwrap();
@@ -353,12 +357,13 @@ impl<R: BufRead> Reader<R> {
353357
/// [`End`]: Event::End
354358
/// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
355359
/// [`read_to_end()`]: Self::read_to_end
360+
/// [`expand_empty_elements`]: Self::expand_empty_elements
356361
/// [`check_end_names`]: Self::check_end_names
357362
/// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
358-
pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec<u8>) -> Result<()> {
359-
read_to_end!(self, end, buf, read_event_impl, {
363+
pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec<u8>) -> Result<Span> {
364+
Ok(read_to_end!(self, end, buf, read_event_impl, {
360365
buf.clear();
361-
})
366+
}))
362367
}
363368

364369
/// Reads optional text between start and end tags.

src/reader/mod.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
#[cfg(feature = "encoding")]
44
use encoding_rs::Encoding;
5+
use std::ops::Range;
56

67
use crate::encoding::Decoder;
78
use crate::errors::{Error, Result};
@@ -238,16 +239,18 @@ macro_rules! read_to_end {
238239
$clear:block
239240
$(, $await:ident)?
240241
) => {{
242+
let start = $self.buffer_position();
241243
let mut depth = 0;
242244
loop {
243245
$clear
246+
let end = $self.buffer_position();
244247
match $self.$read_event($buf) $(.$await)? {
245248
Err(e) => return Err(e),
246249

247250
Ok(Event::Start(e)) if e.name() == $end => depth += 1,
248251
Ok(Event::End(e)) if e.name() == $end => {
249252
if depth == 0 {
250-
return Ok(());
253+
break start..end;
251254
}
252255
depth -= 1;
253256
}
@@ -270,6 +273,11 @@ mod slice_reader;
270273

271274
pub use ns_reader::NsReader;
272275

276+
/// Range of input in bytes, that corresponds to some piece of XML
277+
pub type Span = Range<usize>;
278+
279+
////////////////////////////////////////////////////////////////////////////////////////////////////
280+
273281
/// Possible reader states. The state transition diagram (`true` and `false` shows
274282
/// value of [`Reader::expand_empty_elements()`] option):
275283
///

src/reader/ns_reader.rs

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use std::path::Path;
1212
use crate::errors::Result;
1313
use crate::events::Event;
1414
use crate::name::{LocalName, NamespaceResolver, QName, ResolveResult};
15-
use crate::reader::{Reader, XmlSource};
15+
use crate::reader::{Reader, Span, XmlSource};
1616

1717
/// A low level encoding-agnostic XML event reader that performs namespace resolution.
1818
///
@@ -425,6 +425,10 @@ impl<R: BufRead> NsReader<R> {
425425
/// storage for events content. This function is supposed to be called after
426426
/// you already read a [`Start`] event.
427427
///
428+
/// Returns a span that cover content between `>` of an opening tag and `<` of
429+
/// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
430+
/// this method was called after reading expanded [`Start`] event.
431+
///
428432
/// Manages nested cases where parent and child elements have the same name
429433
/// ("the same" means that their local names are the same and their prefixes
430434
/// resolves to the same namespace).
@@ -491,7 +495,7 @@ impl<R: BufRead> NsReader<R> {
491495
/// (ResolveResult::Bound(ns), Event::Start(start))
492496
/// );
493497
///
494-
/// //...then, we could skip all events to the corresponding end event.
498+
/// // ...then, we could skip all events to the corresponding end event.
495499
/// // This call will correctly handle nested <outer> elements.
496500
/// // Note, however, that this method does not handle namespaces.
497501
/// reader.read_to_end_into(end.name(), &mut buf).unwrap();
@@ -508,8 +512,9 @@ impl<R: BufRead> NsReader<R> {
508512
/// [`UnexpectedEof`]: crate::errors::Error::UnexpectedEof
509513
/// [`read_to_end()`]: Self::read_to_end
510514
/// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
515+
/// [`expand_empty_elements`]: Self::expand_empty_elements
511516
#[inline]
512-
pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec<u8>) -> Result<()> {
517+
pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec<u8>) -> Result<Span> {
513518
// According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
514519
// match literally the start name. See `Self::check_end_names` documentation
515520
self.reader.read_to_end_into(end, buf)
@@ -657,6 +662,10 @@ impl<'i> NsReader<&'i [u8]> {
657662
/// Reads until end element is found. This function is supposed to be called
658663
/// after you already read a [`Start`] event.
659664
///
665+
/// Returns a span that cover content between `>` of an opening tag and `<` of
666+
/// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
667+
/// this method was called after reading expanded [`Start`] event.
668+
///
660669
/// Manages nested cases where parent and child elements have the same name
661670
/// ("the same" means that their local names are the same and their prefixes
662671
/// resolves to the same namespace).
@@ -717,7 +726,7 @@ impl<'i> NsReader<&'i [u8]> {
717726
/// (ResolveResult::Bound(ns), Event::Start(start))
718727
/// );
719728
///
720-
/// //...then, we could skip all events to the corresponding end event.
729+
/// // ...then, we could skip all events to the corresponding end event.
721730
/// // This call will correctly handle nested <outer> elements.
722731
/// // Note, however, that this method does not handle namespaces.
723732
/// reader.read_to_end(end.name()).unwrap();
@@ -734,8 +743,9 @@ impl<'i> NsReader<&'i [u8]> {
734743
/// [`UnexpectedEof`]: crate::errors::Error::UnexpectedEof
735744
/// [`read_to_end()`]: Self::read_to_end
736745
/// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
746+
/// [`expand_empty_elements`]: Self::expand_empty_elements
737747
#[inline]
738-
pub fn read_to_end(&mut self, end: QName) -> Result<()> {
748+
pub fn read_to_end(&mut self, end: QName) -> Result<Span> {
739749
// According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
740750
// match literally the start name. See `Self::check_end_names` documentation
741751
self.reader.read_to_end(end)

src/reader/slice_reader.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use encoding_rs::UTF_8;
1010
use crate::errors::{Error, Result};
1111
use crate::events::Event;
1212
use crate::name::QName;
13-
use crate::reader::{is_whitespace, BangType, ReadElementState, Reader, XmlSource};
13+
use crate::reader::{is_whitespace, BangType, ReadElementState, Reader, Span, XmlSource};
1414

1515
use memchr;
1616

@@ -74,6 +74,10 @@ impl<'a> Reader<&'a [u8]> {
7474
/// Reads until end element is found. This function is supposed to be called
7575
/// after you already read a [`Start`] event.
7676
///
77+
/// Returns a span that cover content between `>` of an opening tag and `<` of
78+
/// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
79+
/// this method was called after reading expanded [`Start`] event.
80+
///
7781
/// Manages nested cases where parent and child elements have the same name.
7882
///
7983
/// If corresponding [`End`] event will not be found, the [`Error::UnexpectedEof`]
@@ -131,7 +135,7 @@ impl<'a> Reader<&'a [u8]> {
131135
/// // First, we read a start event...
132136
/// assert_eq!(reader.read_event().unwrap(), Event::Start(start));
133137
///
134-
/// //...then, we could skip all events to the corresponding end event.
138+
/// // ...then, we could skip all events to the corresponding end event.
135139
/// // This call will correctly handle nested <outer> elements.
136140
/// // Note, however, that this method does not handle namespaces.
137141
/// reader.read_to_end(end.name()).unwrap();
@@ -143,10 +147,11 @@ impl<'a> Reader<&'a [u8]> {
143147
/// [`Start`]: Event::Start
144148
/// [`End`]: Event::End
145149
/// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
150+
/// [`expand_empty_elements`]: Self::expand_empty_elements
146151
/// [`check_end_names`]: Self::check_end_names
147152
/// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
148-
pub fn read_to_end(&mut self, end: QName) -> Result<()> {
149-
read_to_end!(self, end, (), read_event_impl, {})
153+
pub fn read_to_end(&mut self, end: QName) -> Result<Span> {
154+
Ok(read_to_end!(self, end, (), read_event_impl, {}))
150155
}
151156
}
152157

0 commit comments

Comments
 (0)