Skip to content

Commit 55f541b

Browse files
Mingundralley
andcommitted
seq: Allow to have an ordinary elements together with a $value field
Example of a struct, that should be a valid definition: enum Choice { one, two, three } struct Root { #[serde(rename = "$value")] item: [Choice; 3], node: (), } and this struct should be able to deserialized from <root> <one/> <two/> <three/> <node/> </root> The following tests were fixed (10): seq::variable_name::fixed_size::field_after_list::after seq::variable_name::fixed_size::field_after_list::before seq::variable_name::fixed_size::field_before_list::after seq::variable_name::fixed_size::field_before_list::before seq::variable_name::fixed_size::two_lists::choice_and_fixed::fixed_after seq::variable_name::fixed_size::two_lists::choice_and_fixed::fixed_before seq::variable_name::fixed_size::two_lists::fixed_and_choice::fixed_after seq::variable_name::fixed_size::two_lists::fixed_and_choice::fixed_before seq::variable_name::variable_size::field_after_list::before seq::variable_name::variable_size::field_before_list::before failures (26): seq::fixed_name::fixed_size::field_after_list::overlapped seq::fixed_name::fixed_size::field_before_list::overlapped seq::fixed_name::fixed_size::two_lists::overlapped seq::fixed_name::fixed_size::unknown_items::overlapped seq::fixed_name::variable_size::field_after_list::overlapped seq::fixed_name::variable_size::field_before_list::overlapped seq::fixed_name::variable_size::two_lists::overlapped seq::fixed_name::variable_size::unknown_items::overlapped seq::variable_name::fixed_size::field_after_list::overlapped seq::variable_name::fixed_size::field_before_list::overlapped seq::variable_name::fixed_size::two_lists::choice_and_fixed::overlapped_fixed_after seq::variable_name::fixed_size::two_lists::choice_and_fixed::overlapped_fixed_before seq::variable_name::fixed_size::two_lists::fixed_and_choice::overlapped_fixed_after seq::variable_name::fixed_size::two_lists::fixed_and_choice::overlapped_fixed_before seq::variable_name::variable_size::field_after_list::after seq::variable_name::variable_size::field_after_list::overlapped seq::variable_name::variable_size::field_before_list::after seq::variable_name::variable_size::field_before_list::overlapped seq::variable_name::variable_size::two_lists::choice_and_fixed::fixed_after seq::variable_name::variable_size::two_lists::choice_and_fixed::fixed_before seq::variable_name::variable_size::two_lists::choice_and_fixed::overlapped_fixed_after seq::variable_name::variable_size::two_lists::choice_and_fixed::overlapped_fixed_before seq::variable_name::variable_size::two_lists::fixed_and_choice::fixed_after seq::variable_name::variable_size::two_lists::fixed_and_choice::fixed_before seq::variable_name::variable_size::two_lists::fixed_and_choice::overlapped_fixed_after seq::variable_name::variable_size::two_lists::fixed_and_choice::overlapped_fixed_before Co-authored-by: Daniel Alley <dalley@redhat.com>
1 parent d2ad730 commit 55f541b

File tree

4 files changed

+140
-22
lines changed

4 files changed

+140
-22
lines changed

Changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
- [#9]: Deserialization erroneously was successful in some cases where error is expected.
1616
This broke deserialization of untagged enums which rely on error if variant cannot be parsed
17+
- [#387]: Allow to have an ordinary elements together with a `$value` field
1718

1819
### Misc Changes
1920

src/de/map.rs

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
use crate::{
44
de::escape::EscapedDeserializer,
5+
de::seq::not_in,
56
de::{deserialize_bool, DeEvent, Deserializer, XmlRead, INNER_VALUE, UNFLATTEN_PREFIX},
67
errors::serialize::DeError,
78
events::attributes::IterState,
@@ -43,12 +44,14 @@ enum ValueSource {
4344
/// </any-tag>
4445
/// ```
4546
Text,
46-
/// Next value should be deserialized from an element with an any name.
47-
/// Corresponding tag name will always be associated with a field with name
48-
/// [`INNER_VALUE`].
47+
/// Next value should be deserialized from an element with an any name, except
48+
/// elements with a name matching one of the struct fields. Corresponding tag
49+
/// name will always be associated with a field with name [`INNER_VALUE`].
4950
///
50-
/// That state is set when call to [`peek()`] returns a [`Start`] event
51-
/// _and_ struct has a field with a special name [`INNER_VALUE`].
51+
/// That state is set when call to [`peek()`] returns a [`Start`] event, which
52+
/// [`name()`] is not listed in the [list of known fields] (which for a struct
53+
/// is a list of field names, and for a map that is an empty list), _and_
54+
/// struct has a field with a special name [`INNER_VALUE`].
5255
///
5356
/// When in this state, next event, returned by [`next()`], will be a [`Start`],
5457
/// which represents both a key, and a value. Value would be deserialized from
@@ -97,7 +100,9 @@ enum ValueSource {
97100
/// [`Start`]: DeEvent::Start
98101
/// [`peek()`]: Deserializer::peek()
99102
/// [`next()`]: Deserializer::next()
103+
/// [`name()`]: BytesStart::name()
100104
/// [`Text`]: Self::Text
105+
/// [list of known fields]: MapAccess::fields
101106
Content,
102107
/// Next value should be deserialized from an element with a dedicated name.
103108
///
@@ -135,7 +140,24 @@ enum ValueSource {
135140
Nested,
136141
}
137142

138-
/// A deserializer for `Attributes`
143+
/// A deserializer that extracts map-like structures from an XML. This deserializer
144+
/// represents a one XML tag:
145+
///
146+
/// ```xml
147+
/// <tag>...</tag>
148+
/// ```
149+
///
150+
/// Name of this tag is stored in a [`Self::start`] property.
151+
///
152+
/// # Lifetimes
153+
///
154+
/// - `'de` lifetime represents a buffer, from which deserialized values can
155+
/// borrow their data. Depending on the underlying reader, there can be an
156+
/// internal buffer of deserializer (i.e. deserializer itself) or an input
157+
/// (in that case it is possible to approach zero-copy deserialization).
158+
///
159+
/// - `'a` lifetime represents a parent deserializer, which could own the data
160+
/// buffer.
139161
pub(crate) struct MapAccess<'de, 'a, R>
140162
where
141163
R: XmlRead<'de>,
@@ -149,6 +171,8 @@ where
149171
/// Current state of the accessor that determines what next call to API
150172
/// methods should return.
151173
source: ValueSource,
174+
/// List of field names of the struct. It is empty for maps
175+
fields: &'static [&'static str],
152176
/// list of fields yet to unflatten (defined as starting with $unflatten=)
153177
unflatten_fields: Vec<&'static [u8]>,
154178
}
@@ -161,13 +185,14 @@ where
161185
pub fn new(
162186
de: &'a mut Deserializer<'de, R>,
163187
start: BytesStart<'de>,
164-
fields: &[&'static str],
188+
fields: &'static [&'static str],
165189
) -> Result<Self, DeError> {
166190
Ok(MapAccess {
167191
de,
168192
start,
169193
iter: IterState::new(0, false),
170194
source: ValueSource::Unknown,
195+
fields,
171196
unflatten_fields: fields
172197
.iter()
173198
.filter(|f| f.starts_with(UNFLATTEN_PREFIX))
@@ -230,7 +255,7 @@ where
230255
// }
231256
// TODO: This should be handled by #[serde(flatten)]
232257
// See https://github.com/serde-rs/serde/issues/1905
233-
DeEvent::Start(_) if has_value_field => {
258+
DeEvent::Start(e) if has_value_field && not_in(self.fields, e, decoder)? => {
234259
self.source = ValueSource::Content;
235260
seed.deserialize(INNER_VALUE.into_deserializer()).map(Some)
236261
}

src/de/seq.rs

Lines changed: 89 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,74 @@
11
use crate::de::{DeError, DeEvent, Deserializer, XmlRead};
22
use crate::events::BytesStart;
3+
use crate::reader::Decoder;
34
use serde::de::{self, DeserializeSeed};
5+
#[cfg(not(feature = "encoding"))]
6+
use std::borrow::Cow;
47

8+
/// Check if tag `start` is included in the `fields` list. `decoder` is used to
9+
/// get a string representation of a tag.
10+
///
11+
/// Returns `true`, if `start` is not in the `fields` list and `false` otherwise.
12+
pub fn not_in(
13+
fields: &'static [&'static str],
14+
start: &BytesStart,
15+
decoder: Decoder,
16+
) -> Result<bool, DeError> {
17+
#[cfg(not(feature = "encoding"))]
18+
let tag = Cow::Borrowed(decoder.decode(start.name())?);
19+
20+
#[cfg(feature = "encoding")]
21+
let tag = decoder.decode(start.name());
22+
23+
Ok(fields.iter().all(|&field| field != tag.as_ref()))
24+
}
25+
26+
/// A filter that determines, what tags should form a sequence.
27+
///
28+
/// There are two types of sequences:
29+
/// - sequence where each element represented by tags with the same name
30+
/// - sequence where each element can have a different tag
31+
///
32+
/// The first variant could represent a collection of structs, the second --
33+
/// a collection of enum variants.
34+
///
35+
/// In the second case we don't know what tag name should be expected as a
36+
/// sequence element, so we accept any element. Since the sequence are flattened
37+
/// into maps, we skip elements which have dedicated fields in a struct by using an
38+
/// `Exclude` filter that filters out elements with names matching field names
39+
/// from the struct.
40+
///
41+
/// # Lifetimes
42+
///
43+
/// `'de` represents a lifetime of the XML input, when filter stores the
44+
/// dedicated tag name
545
#[derive(Debug)]
6-
enum Names {
7-
Unknown,
8-
Peek(Vec<u8>),
46+
enum TagFilter<'de> {
47+
/// A `SeqAccess` interested only in tags with specified name to deserialize
48+
/// an XML like this:
49+
///
50+
/// ```xml
51+
/// <...>
52+
/// <tag/>
53+
/// <tag/>
54+
/// <tag/>
55+
/// ...
56+
/// </...>
57+
/// ```
58+
///
59+
/// The tag name is stored inside (`b"tag"` for that example)
60+
Include(BytesStart<'de>), //TODO: Need to store only name instead of a whole tag
61+
/// A `SeqAccess` interested in tags with any name, except explicitly listed.
62+
/// Excluded tags are used as struct field names and therefore should not
63+
/// fall into a `$value` category
64+
Exclude(&'static [&'static str]),
965
}
1066

11-
impl Names {
12-
fn is_valid(&self, start: &BytesStart) -> bool {
67+
impl<'de> TagFilter<'de> {
68+
fn is_suitable(&self, start: &BytesStart, decoder: Decoder) -> Result<bool, DeError> {
1369
match self {
14-
Names::Unknown => true,
15-
Names::Peek(n) => n == start.name(),
70+
Self::Include(n) => Ok(n.name() == start.name()),
71+
Self::Exclude(fields) => not_in(fields, start, decoder),
1672
}
1773
}
1874
}
@@ -22,8 +78,10 @@ pub struct SeqAccess<'de, 'a, R>
2278
where
2379
R: XmlRead<'de>,
2480
{
81+
/// Deserializer used to deserialize sequence items
2582
de: &'a mut Deserializer<'de, R>,
26-
names: Names,
83+
/// Filter that determines whether a tag is a part of this sequence.
84+
filter: TagFilter<'de>,
2785
}
2886

2987
impl<'a, 'de, R> SeqAccess<'de, 'a, R>
@@ -32,16 +90,17 @@ where
3290
{
3391
/// Get a new SeqAccess
3492
pub fn new(de: &'a mut Deserializer<'de, R>) -> Result<Self, DeError> {
35-
let names = if de.has_value_field {
36-
Names::Unknown
93+
let filter = if de.has_value_field {
94+
TagFilter::Exclude(&[])
3795
} else {
3896
if let DeEvent::Start(e) = de.peek()? {
39-
Names::Peek(e.name().to_vec())
97+
// Clone is cheap if event borrows from the input
98+
TagFilter::Include(e.clone())
4099
} else {
41-
Names::Unknown
100+
TagFilter::Exclude(&[])
42101
}
43102
};
44-
Ok(SeqAccess { de, names })
103+
Ok(Self { de, filter })
45104
}
46105
}
47106

@@ -55,10 +114,26 @@ where
55114
where
56115
T: DeserializeSeed<'de>,
57116
{
117+
let decoder = self.de.reader.decoder();
58118
match self.de.peek()? {
59119
DeEvent::Eof | DeEvent::End(_) => Ok(None),
60-
DeEvent::Start(e) if !self.names.is_valid(e) => Ok(None),
120+
DeEvent::Start(e) if !self.filter.is_suitable(e, decoder)? => Ok(None),
61121
_ => seed.deserialize(&mut *self.de).map(Some),
62122
}
63123
}
64124
}
125+
126+
#[test]
127+
fn test_not_in() {
128+
let tag = BytesStart::borrowed_name(b"tag");
129+
130+
assert_eq!(not_in(&[], &tag, Decoder::utf8()).unwrap(), true);
131+
assert_eq!(
132+
not_in(&["no", "such", "tags"], &tag, Decoder::utf8()).unwrap(),
133+
true
134+
);
135+
assert_eq!(
136+
not_in(&["some", "tag", "included"], &tag, Decoder::utf8()).unwrap(),
137+
false
138+
);
139+
}

src/reader.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1669,6 +1669,23 @@ impl Decoder {
16691669
}
16701670
}
16711671

1672+
/// This implementation is required for tests of other parts of the library
1673+
#[cfg(test)]
1674+
#[cfg(feature = "serialize")]
1675+
impl Decoder {
1676+
#[cfg(not(feature = "encoding"))]
1677+
pub(crate) fn utf8() -> Self {
1678+
Decoder
1679+
}
1680+
1681+
#[cfg(feature = "encoding")]
1682+
pub(crate) fn utf8() -> Self {
1683+
Decoder {
1684+
encoding: encoding_rs::UTF_8,
1685+
}
1686+
}
1687+
}
1688+
16721689
#[cfg(test)]
16731690
mod test {
16741691
macro_rules! check {

0 commit comments

Comments
 (0)