Properly normalize EOL characters in BytesText::decode, BytesCData::decode and BytesRef::decode methods

Mingun · Mingun · commit 8cfcbb54b9fb · 2025-07-15T22:42:58.000+05:00
diff --git a/Changelog.md b/Changelog.md
@@ -17,6 +17,9 @@
 
 ### Bug Fixes
 
+- [#379]: Properly normalize EOL characters in `BytesText::decode`, `BytesCData::decode`
+  and `BytesRef::decode` methods
+
 ### Misc Changes
 
 
diff --git a/src/encoding.rs b/src/encoding.rs
@@ -6,6 +6,8 @@ use std::str::Utf8Error;
 #[cfg(feature = "encoding")]
 use encoding_rs::{DecoderResult, Encoding, UTF_16BE, UTF_16LE, UTF_8};
 
+use crate::escape::normalize_eols;
+
 /// Unicode "byte order mark" (\u{FEFF}) encoded as UTF-8.
 /// See <https://unicode.org/faq/utf_bom.html#bom1>
 pub(crate) const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
@@ -145,9 +147,20 @@ impl Decoder {
         bytes: &Cow<'b, [u8]>,
     ) -> Result<Cow<'b, str>, EncodingError> {
         match bytes {
-            Cow::Borrowed(bytes) => self.decode(bytes),
+            Cow::Borrowed(bytes) => {
+                let text = self.decode(bytes)?;
+                match normalize_eols(&text) {
+                    // If text borrowed after normalization that means that it's not changed
+                    Cow::Borrowed(_) => Ok(text),
+                    Cow::Owned(s) => Ok(Cow::Owned(s)),
+                }
+            }
             // Convert to owned, because otherwise Cow will be bound with wrong lifetime
-            Cow::Owned(bytes) => Ok(self.decode(bytes)?.into_owned().into()),
+            Cow::Owned(bytes) => {
+                let text = self.decode(bytes)?;
+                let text = normalize_eols(&text);
+                Ok(text.into_owned().into())
+            }
         }
     }
 }
diff --git a/tests/serde-se.rs b/tests/serde-se.rs
@@ -1897,9 +1897,11 @@ mod with_root {
             <root>3</root>");
     serialize_as!(tuple:
         // Use to_string() to get owned type that is required for deserialization
-        ("<\"&'>".to_string(), "with\t\r\n spaces", 3usize)
+        // NOTE: do not use \r, because it normalized to \n during deserialziation
+        // but writes as is during serialization
+        ("<\"&'>".to_string(), "with\t\n spaces", 3usize)
         => "<root>&lt;\"&amp;'&gt;</root>\
-            <root>with\t\r\n spaces</root>\
+            <root>with\t\n spaces</root>\
             <root>3</root>");
     serialize_as!(tuple_struct:
         Tuple(42.0, "answer")