Skip to content

Commit 6a52ecc

Browse files
Mingundralley
andcommitted
seq: Split sequence accessors for top-level access and in-map access
The following tests were fixed (6): seq::variable_name::variable_size::field_after_list::after seq::variable_name::variable_size::field_before_list::after seq::variable_name::variable_size::two_lists::choice_and_fixed::fixed_after seq::variable_name::variable_size::two_lists::choice_and_fixed::fixed_before seq::variable_name::variable_size::two_lists::fixed_and_choice::fixed_after seq::variable_name::variable_size::two_lists::fixed_and_choice::fixed_before failures (20): seq::fixed_name::fixed_size::field_after_list::overlapped seq::fixed_name::fixed_size::field_before_list::overlapped seq::fixed_name::fixed_size::two_lists::overlapped seq::fixed_name::fixed_size::unknown_items::overlapped seq::fixed_name::variable_size::field_after_list::overlapped seq::fixed_name::variable_size::field_before_list::overlapped seq::fixed_name::variable_size::two_lists::overlapped seq::fixed_name::variable_size::unknown_items::overlapped seq::variable_name::fixed_size::field_after_list::overlapped seq::variable_name::fixed_size::field_before_list::overlapped seq::variable_name::fixed_size::two_lists::choice_and_fixed::overlapped_fixed_after seq::variable_name::fixed_size::two_lists::choice_and_fixed::overlapped_fixed_before seq::variable_name::fixed_size::two_lists::fixed_and_choice::overlapped_fixed_after seq::variable_name::fixed_size::two_lists::fixed_and_choice::overlapped_fixed_before seq::variable_name::variable_size::field_after_list::overlapped seq::variable_name::variable_size::field_before_list::overlapped seq::variable_name::variable_size::two_lists::choice_and_fixed::overlapped_fixed_after seq::variable_name::variable_size::two_lists::choice_and_fixed::overlapped_fixed_before seq::variable_name::variable_size::two_lists::fixed_and_choice::overlapped_fixed_after seq::variable_name::variable_size::two_lists::fixed_and_choice::overlapped_fixed_before Co-authored-by: Daniel Alley <dalley@redhat.com>
1 parent 55f541b commit 6a52ecc

File tree

3 files changed

+200
-19
lines changed

3 files changed

+200
-19
lines changed

src/de/map.rs

Lines changed: 184 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22
33
use crate::{
44
de::escape::EscapedDeserializer,
5-
de::seq::not_in,
5+
de::seq::{not_in, TagFilter},
66
de::{deserialize_bool, DeEvent, Deserializer, XmlRead, INNER_VALUE, UNFLATTEN_PREFIX},
77
errors::serialize::DeError,
88
events::attributes::IterState,
99
events::{BytesCData, BytesStart},
1010
reader::Decoder,
1111
};
12-
use serde::de::{self, DeserializeSeed, IntoDeserializer, Visitor};
12+
use serde::de::{self, DeserializeSeed, IntoDeserializer, SeqAccess, Visitor};
1313
use serde::serde_if_integer128;
1414
use std::borrow::Cow;
1515
use std::ops::Range;
@@ -312,23 +312,32 @@ where
312312
// is implicit and equals to the `INNER_VALUE` constant, and the value
313313
// is a `Text` or a `CData` event (the value deserializer will see one
314314
// of that events)
315-
ValueSource::Text => seed.deserialize(MapValueDeserializer { map: self }),
315+
ValueSource::Text => seed.deserialize(MapValueDeserializer {
316+
map: self,
317+
allow_start: false,
318+
}),
316319
// This arm processes the following XML shape:
317320
// <any-tag>
318321
// <any>...</any>
319322
// </any-tag>
320323
// The whole map represented by an `<any-tag>` element, the map key
321324
// is implicit and equals to the `INNER_VALUE` constant, and the value
322325
// is a `Start` event (the value deserializer will see that event)
323-
ValueSource::Content => seed.deserialize(MapValueDeserializer { map: self }),
326+
ValueSource::Content => seed.deserialize(MapValueDeserializer {
327+
map: self,
328+
allow_start: false,
329+
}),
324330
// This arm processes the following XML shape:
325331
// <any-tag>
326332
// <tag>...</tag>
327333
// </any-tag>
328334
// The whole map represented by an `<any-tag>` element, the map key
329335
// is a `tag`, and the value is a `Start` event (the value deserializer
330336
// will see that event)
331-
ValueSource::Nested => seed.deserialize(&mut *self.de),
337+
ValueSource::Nested => seed.deserialize(MapValueDeserializer {
338+
map: self,
339+
allow_start: true,
340+
}),
332341
ValueSource::Unknown => Err(DeError::KeyNotRead),
333342
}
334343
}
@@ -364,6 +373,82 @@ where
364373
/// Access to the map that created this deserializer. Gives access to the
365374
/// context, such as list of fields, that current map known about.
366375
map: &'m mut MapAccess<'de, 'a, R>,
376+
/// Determines, should [`Deserializer::next_text_impl()`] expand the second
377+
/// level of tags or not.
378+
///
379+
/// If this field is `true`, we process the following XML shape:
380+
///
381+
/// ```xml
382+
/// <any-tag>
383+
/// <tag>...</tag>
384+
/// </any-tag>
385+
/// ```
386+
///
387+
/// The whole map represented by an `<any-tag>` element, the map key is a `tag`,
388+
/// and the value starts with is a `Start("tag")` (the value deserializer will
389+
/// see that event first) and extended to the matching `End("tag")` event.
390+
/// In order to deserialize primitives (such as `usize`) we need to allow to
391+
/// look inside the one levels of tags, so the
392+
///
393+
/// ```xml
394+
/// <tag>42<tag>
395+
/// ```
396+
///
397+
/// could be deserialized into `42usize` without problems, and at the same time
398+
///
399+
/// ```xml
400+
/// <tag>
401+
/// <key1/>
402+
/// <key2/>
403+
/// <!--...-->
404+
/// <tag>
405+
/// ```
406+
/// could be deserialized to a struct.
407+
///
408+
/// If this field is `false`, we processes the one of following XML shapes:
409+
///
410+
/// ```xml
411+
/// <any-tag>
412+
/// text value
413+
/// </any-tag>
414+
/// ```
415+
/// ```xml
416+
/// <any-tag>
417+
/// <![CDATA[cdata value]]>
418+
/// </any-tag>
419+
/// ```
420+
/// ```xml
421+
/// <any-tag>
422+
/// <any>...</any>
423+
/// </any-tag>
424+
/// ```
425+
///
426+
/// The whole map represented by an `<any-tag>` element, the map key is
427+
/// implicit and equals to the [`INNER_VALUE`] constant, and the value is
428+
/// a [`Text`], a [`CData`], or a [`Start`] event (the value deserializer
429+
/// will see one of those events). In the first two cases the value of this
430+
/// field do not matter (because we already see the textual event and there
431+
/// no reasons to look "inside" something), but in the last case the primitives
432+
/// should raise a deserialization error, because that means that you trying
433+
/// to deserialize the following struct:
434+
///
435+
/// ```ignore
436+
/// struct AnyName {
437+
/// #[serde(rename = "$value")]
438+
/// any_name: String,
439+
/// }
440+
/// ```
441+
/// which means that `any_name` should get a content of the `<any-tag>` element.
442+
///
443+
/// Changing this can be valuable for <https://github.com/tafia/quick-xml/issues/383>,
444+
/// but those fields should be explicitly marked that they want to get any
445+
/// possible markup as a `String` and that mark is different from marking them
446+
/// as accepting "text content" which the currently `$value` means.
447+
///
448+
/// [`Text`]: DeEvent::Text
449+
/// [`CData`]: DeEvent::CData
450+
/// [`Start`]: DeEvent::Start
451+
allow_start: bool,
367452
}
368453

369454
impl<'de, 'a, 'm, R> MapValueDeserializer<'de, 'a, 'm, R>
@@ -373,7 +458,7 @@ where
373458
/// Returns a text event, used inside [`deserialize_primitives!()`]
374459
#[inline]
375460
fn next_text(&mut self, unescape: bool) -> Result<BytesCData<'de>, DeError> {
376-
self.map.de.next_text_impl(unescape, false)
461+
self.map.de.next_text_impl(unescape, self.allow_start)
377462
}
378463

379464
/// Returns a decoder, used inside [`deserialize_primitives!()`]
@@ -396,10 +481,6 @@ where
396481
forward!(deserialize_unit_struct(name: &'static str));
397482
forward!(deserialize_newtype_struct(name: &'static str));
398483

399-
forward!(deserialize_seq);
400-
forward!(deserialize_tuple(len: usize));
401-
forward!(deserialize_tuple_struct(name: &'static str, len: usize));
402-
403484
forward!(deserialize_map);
404485
forward!(deserialize_struct(
405486
name: &'static str,
@@ -414,8 +495,101 @@ where
414495
forward!(deserialize_any);
415496
forward!(deserialize_ignored_any);
416497

498+
/// Tuple representation is the same as [sequences](#method.deserialize_seq).
499+
fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value, DeError>
500+
where
501+
V: Visitor<'de>,
502+
{
503+
self.deserialize_seq(visitor)
504+
}
505+
506+
/// Named tuple representation is the same as [unnamed tuples](#method.deserialize_tuple).
507+
fn deserialize_tuple_struct<V>(
508+
self,
509+
_name: &'static str,
510+
len: usize,
511+
visitor: V,
512+
) -> Result<V::Value, DeError>
513+
where
514+
V: Visitor<'de>,
515+
{
516+
self.deserialize_tuple(len, visitor)
517+
}
518+
519+
fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, Self::Error>
520+
where
521+
V: Visitor<'de>,
522+
{
523+
let filter = if self.allow_start {
524+
match self.map.de.peek()? {
525+
// Clone is cheap if event borrows from the input
526+
DeEvent::Start(e) => TagFilter::Include(e.clone()),
527+
// SAFETY: we use that deserializer with `allow_start == true`
528+
// only from the `MapAccess::next_value_seed` and only when we
529+
// peeked `Start` event
530+
_ => unreachable!(),
531+
}
532+
} else {
533+
TagFilter::Exclude(self.map.fields)
534+
};
535+
let seq = visitor.visit_seq(MapValueSeqAccess {
536+
map: self.map,
537+
filter,
538+
});
539+
#[cfg(feature = "overlapped-lists")]
540+
self.map.de.start_replay();
541+
seq
542+
}
543+
417544
#[inline]
418545
fn is_human_readable(&self) -> bool {
419546
self.map.de.is_human_readable()
420547
}
421548
}
549+
550+
////////////////////////////////////////////////////////////////////////////////////////////////////
551+
552+
/// An accessor to sequence elements forming a value for struct field.
553+
/// Technically, this sequence is flattened out into structure and sequence
554+
/// elements are overlapped with other fields of a structure
555+
struct MapValueSeqAccess<'de, 'a, 'm, R>
556+
where
557+
R: XmlRead<'de>,
558+
{
559+
/// Accessor to a map that creates this accessor and to a deserializer for
560+
/// a sequence items.
561+
map: &'m mut MapAccess<'de, 'a, R>,
562+
/// Filter that determines whether a tag is a part of this sequence.
563+
///
564+
/// Iteration will stop when found a tag that does not pass this filter.
565+
filter: TagFilter<'de>,
566+
}
567+
568+
impl<'de, 'a, 'm, R> SeqAccess<'de> for MapValueSeqAccess<'de, 'a, 'm, R>
569+
where
570+
R: XmlRead<'de>,
571+
{
572+
type Error = DeError;
573+
574+
fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, DeError>
575+
where
576+
T: DeserializeSeed<'de>,
577+
{
578+
let decoder = self.map.de.reader.decoder();
579+
match self.map.de.peek()? {
580+
// Stop iteration when list elements ends
581+
DeEvent::Start(e) if !self.filter.is_suitable(&e, decoder)? => Ok(None),
582+
583+
// Stop iteration after reaching a closing tag
584+
DeEvent::End(e) if e.name() == self.map.start.name() => Ok(None),
585+
// This is a unmatched closing tag, so the XML is invalid
586+
DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().to_owned())),
587+
// We cannot get `Eof` legally, because we always inside of the
588+
// opened tag `self.map.start`
589+
DeEvent::Eof => Err(DeError::UnexpectedEof),
590+
591+
// Start(tag), Text, CData
592+
_ => seed.deserialize(&mut *self.map.de).map(Some),
593+
}
594+
}
595+
}

src/de/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -649,7 +649,7 @@ where
649649
where
650650
V: Visitor<'de>,
651651
{
652-
visitor.visit_seq(seq::SeqAccess::new(self)?)
652+
visitor.visit_seq(seq::TopLevelSeqAccess::new(self)?)
653653
}
654654

655655
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, DeError>

src/de/seq.rs

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::de::{DeError, DeEvent, Deserializer, XmlRead};
22
use crate::events::BytesStart;
33
use crate::reader::Decoder;
4-
use serde::de::{self, DeserializeSeed};
4+
use serde::de::{DeserializeSeed, SeqAccess};
55
#[cfg(not(feature = "encoding"))]
66
use std::borrow::Cow;
77

@@ -43,7 +43,7 @@ pub fn not_in(
4343
/// `'de` represents a lifetime of the XML input, when filter stores the
4444
/// dedicated tag name
4545
#[derive(Debug)]
46-
enum TagFilter<'de> {
46+
pub enum TagFilter<'de> {
4747
/// A `SeqAccess` interested only in tags with specified name to deserialize
4848
/// an XML like this:
4949
///
@@ -65,7 +65,7 @@ enum TagFilter<'de> {
6565
}
6666

6767
impl<'de> TagFilter<'de> {
68-
fn is_suitable(&self, start: &BytesStart, decoder: Decoder) -> Result<bool, DeError> {
68+
pub fn is_suitable(&self, start: &BytesStart, decoder: Decoder) -> Result<bool, DeError> {
6969
match self {
7070
Self::Include(n) => Ok(n.name() == start.name()),
7171
Self::Exclude(fields) => not_in(fields, start, decoder),
@@ -74,21 +74,23 @@ impl<'de> TagFilter<'de> {
7474
}
7575

7676
/// A SeqAccess
77-
pub struct SeqAccess<'de, 'a, R>
77+
pub struct TopLevelSeqAccess<'de, 'a, R>
7878
where
7979
R: XmlRead<'de>,
8080
{
8181
/// Deserializer used to deserialize sequence items
8282
de: &'a mut Deserializer<'de, R>,
8383
/// Filter that determines whether a tag is a part of this sequence.
84+
///
85+
/// Iteration will stop when found a tag that does not pass this filter.
8486
filter: TagFilter<'de>,
8587
}
8688

87-
impl<'a, 'de, R> SeqAccess<'de, 'a, R>
89+
impl<'a, 'de, R> TopLevelSeqAccess<'de, 'a, R>
8890
where
8991
R: XmlRead<'de>,
9092
{
91-
/// Get a new SeqAccess
93+
/// Creates a new accessor to a top-level sequence of XML elements.
9294
pub fn new(de: &'a mut Deserializer<'de, R>) -> Result<Self, DeError> {
9395
let filter = if de.has_value_field {
9496
TagFilter::Exclude(&[])
@@ -104,7 +106,7 @@ where
104106
}
105107
}
106108

107-
impl<'de, 'a, R> de::SeqAccess<'de> for SeqAccess<'de, 'a, R>
109+
impl<'de, 'a, R> SeqAccess<'de> for TopLevelSeqAccess<'de, 'a, R>
108110
where
109111
R: XmlRead<'de>,
110112
{
@@ -116,8 +118,13 @@ where
116118
{
117119
let decoder = self.de.reader.decoder();
118120
match self.de.peek()? {
119-
DeEvent::Eof | DeEvent::End(_) => Ok(None),
121+
// Stop iteration when list elements ends
120122
DeEvent::Start(e) if !self.filter.is_suitable(e, decoder)? => Ok(None),
123+
// This is unmatched End tag at top-level
124+
DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().to_owned())),
125+
DeEvent::Eof => Ok(None),
126+
127+
// Start(tag), Text, CData
121128
_ => seed.deserialize(&mut *self.de).map(Some),
122129
}
123130
}

0 commit comments

Comments
 (0)