Skip to content

Commit 8cfcbb5

Browse files
committed
Properly normalize EOL characters in BytesText::decode, BytesCData::decode and BytesRef::decode methods
1 parent 72647ef commit 8cfcbb5

File tree

3 files changed

+22
-4
lines changed

3 files changed

+22
-4
lines changed

Changelog.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717

1818
### Bug Fixes
1919

20+
- [#379]: Properly normalize EOL characters in `BytesText::decode`, `BytesCData::decode`
21+
and `BytesRef::decode` methods
22+
2023
### Misc Changes
2124

2225

src/encoding.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ use std::str::Utf8Error;
66
#[cfg(feature = "encoding")]
77
use encoding_rs::{DecoderResult, Encoding, UTF_16BE, UTF_16LE, UTF_8};
88

9+
use crate::escape::normalize_eols;
10+
911
/// Unicode "byte order mark" (\u{FEFF}) encoded as UTF-8.
1012
/// See <https://unicode.org/faq/utf_bom.html#bom1>
1113
pub(crate) const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
@@ -145,9 +147,20 @@ impl Decoder {
145147
bytes: &Cow<'b, [u8]>,
146148
) -> Result<Cow<'b, str>, EncodingError> {
147149
match bytes {
148-
Cow::Borrowed(bytes) => self.decode(bytes),
150+
Cow::Borrowed(bytes) => {
151+
let text = self.decode(bytes)?;
152+
match normalize_eols(&text) {
153+
// If text borrowed after normalization that means that it's not changed
154+
Cow::Borrowed(_) => Ok(text),
155+
Cow::Owned(s) => Ok(Cow::Owned(s)),
156+
}
157+
}
149158
// Convert to owned, because otherwise Cow will be bound with wrong lifetime
150-
Cow::Owned(bytes) => Ok(self.decode(bytes)?.into_owned().into()),
159+
Cow::Owned(bytes) => {
160+
let text = self.decode(bytes)?;
161+
let text = normalize_eols(&text);
162+
Ok(text.into_owned().into())
163+
}
151164
}
152165
}
153166
}

tests/serde-se.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1897,9 +1897,11 @@ mod with_root {
18971897
<root>3</root>");
18981898
serialize_as!(tuple:
18991899
// Use to_string() to get owned type that is required for deserialization
1900-
("<\"&'>".to_string(), "with\t\r\n spaces", 3usize)
1900+
// NOTE: do not use \r, because it normalized to \n during deserialziation
1901+
// but writes as is during serialization
1902+
("<\"&'>".to_string(), "with\t\n spaces", 3usize)
19011903
=> "<root>&lt;\"&amp;'&gt;</root>\
1902-
<root>with\t\r\n spaces</root>\
1904+
<root>with\t\n spaces</root>\
19031905
<root>3</root>");
19041906
serialize_as!(tuple_struct:
19051907
Tuple(42.0, "answer")

0 commit comments

Comments
 (0)