Skip to content

Commit abec80f

Browse files
committed
Fix #329: Borrow from input in unescape_* methods
Because those methods usually used on events returned by reader, which always borrow content from input / buffer, actual allocation count does not changed
1 parent 2bf2d2d commit abec80f

File tree

3 files changed

+30
-15
lines changed

3 files changed

+30
-15
lines changed

Changelog.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@
139139
|`*_with_custom_entities`|`*_with`
140140
|`BytesText::unescaped()`|`BytesText::unescape()`
141141
|`Attribute::unescaped_*`|`Attribute::unescape_*`
142+
- [#329]: Also, that functions now borrow from the input instead of event / attribute
142143

143144
- [#416]: `BytesStart::to_borrowed` renamed to `BytesStart::borrow`, the same method
144145
added to all events
@@ -199,6 +200,7 @@
199200
[#180]: https://github.com/tafia/quick-xml/issues/180
200201
[#191]: https://github.com/tafia/quick-xml/issues/191
201202
[#324]: https://github.com/tafia/quick-xml/issues/324
203+
[#329]: https://github.com/tafia/quick-xml/issues/329
202204
[#363]: https://github.com/tafia/quick-xml/issues/363
203205
[#387]: https://github.com/tafia/quick-xml/pull/387
204206
[#391]: https://github.com/tafia/quick-xml/pull/391

src/events/attributes.rs

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ impl<'a> Attribute<'a> {
4141
///
4242
/// This method is available only if `encoding` feature is **not** enabled.
4343
#[cfg(any(doc, not(feature = "encoding")))]
44-
pub fn unescape_value(&self) -> XmlResult<Cow<str>> {
44+
pub fn unescape_value(&self) -> XmlResult<Cow<'a, str>> {
4545
self.unescape_value_with(|_| None)
4646
}
4747

@@ -61,19 +61,26 @@ impl<'a> Attribute<'a> {
6161
pub fn unescape_value_with<'entity>(
6262
&self,
6363
resolve_entity: impl Fn(&str) -> Option<&'entity str>,
64-
) -> XmlResult<Cow<str>> {
64+
) -> XmlResult<Cow<'a, str>> {
6565
// from_utf8 should never fail because content is always UTF-8 encoded
66-
Ok(unescape_with(
67-
std::str::from_utf8(&self.value)?,
68-
resolve_entity,
69-
)?)
66+
let decoded = match &self.value {
67+
Cow::Borrowed(bytes) => Cow::Borrowed(std::str::from_utf8(bytes)?),
68+
// Convert to owned, because otherwise Cow will be bound with wrong lifetime
69+
Cow::Owned(bytes) => Cow::Owned(std::str::from_utf8(bytes)?.to_string()),
70+
};
71+
72+
match unescape_with(&decoded, resolve_entity)? {
73+
// Because result is borrowed, no replacements was done and we can use original string
74+
Cow::Borrowed(_) => Ok(decoded),
75+
Cow::Owned(s) => Ok(s.into()),
76+
}
7077
}
7178

7279
/// Decodes then unescapes the value.
7380
///
7481
/// This will allocate if the value contains any escape sequences or in
7582
/// non-UTF-8 encoding.
76-
pub fn decode_and_unescape_value<B>(&self, reader: &Reader<B>) -> XmlResult<Cow<str>> {
83+
pub fn decode_and_unescape_value<B>(&self, reader: &Reader<B>) -> XmlResult<Cow<'a, str>> {
7784
self.decode_and_unescape_value_with(reader, |_| None)
7885
}
7986

@@ -85,8 +92,12 @@ impl<'a> Attribute<'a> {
8592
&self,
8693
reader: &Reader<B>,
8794
resolve_entity: impl Fn(&str) -> Option<&'entity str>,
88-
) -> XmlResult<Cow<str>> {
89-
let decoded = reader.decoder().decode(&*self.value)?;
95+
) -> XmlResult<Cow<'a, str>> {
96+
let decoded = match &self.value {
97+
Cow::Borrowed(bytes) => reader.decoder().decode(bytes)?,
98+
// Convert to owned, because otherwise Cow will be bound with wrong lifetime
99+
Cow::Owned(bytes) => reader.decoder().decode(bytes)?.into_owned().into(),
100+
};
90101

91102
match unescape_with(&decoded, resolve_entity)? {
92103
// Because result is borrowed, no replacements was done and we can use original string

src/events/mod.rs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -732,7 +732,7 @@ impl<'a> BytesText<'a> {
732732
///
733733
/// This will allocate if the value contains any escape sequences or in
734734
/// non-UTF-8 encoding.
735-
pub fn unescape(&self) -> Result<Cow<str>> {
735+
pub fn unescape(&self) -> Result<Cow<'a, str>> {
736736
self.unescape_with(|_| None)
737737
}
738738

@@ -743,8 +743,12 @@ impl<'a> BytesText<'a> {
743743
pub fn unescape_with<'entity>(
744744
&self,
745745
resolve_entity: impl Fn(&str) -> Option<&'entity str>,
746-
) -> Result<Cow<str>> {
747-
let decoded = self.decoder.decode(&*self)?;
746+
) -> Result<Cow<'a, str>> {
747+
let decoded = match &self.content {
748+
Cow::Borrowed(bytes) => self.decoder.decode(bytes)?,
749+
// Convert to owned, because otherwise Cow will be bound with wrong lifetime
750+
Cow::Owned(bytes) => self.decoder.decode(bytes)?.into_owned().into(),
751+
};
748752

749753
match unescape_with(&decoded, resolve_entity)? {
750754
// Because result is borrowed, no replacements was done and we can use original string
@@ -754,11 +758,9 @@ impl<'a> BytesText<'a> {
754758
}
755759

756760
/// Gets content of this text buffer in the specified encoding and optionally
757-
/// unescapes it. Unlike [`Self::unescape`] & Co., the lifetime
758-
/// of the returned `Cow` is bound to the original buffer / input
761+
/// unescapes it.
759762
#[cfg(feature = "serialize")]
760763
pub(crate) fn decode(&self, unescape: bool) -> Result<Cow<'a, str>> {
761-
//TODO: too many copies, can be optimized
762764
let text = match &self.content {
763765
Cow::Borrowed(bytes) => self.decoder.decode(bytes)?,
764766
// Convert to owned, because otherwise Cow will be bound with wrong lifetime

0 commit comments

Comments
 (0)