@@ -134,6 +134,20 @@ macro_rules! serialize_fmt {
134134 } } ;
135135}
136136
137+ /// Upper-case hex for value in 0..16, encoded as ASCII bytes
138+ fn hex_4bit ( c : u8 ) -> u8 {
139+ if c <= 9 {
140+ 0x30 + c
141+ } else {
142+ 0x41 + ( c - 10 )
143+ }
144+ }
145+
146+ /// Upper-case hex for value in 0..256, encoded as ASCII bytes
147+ fn hex ( c : u8 ) -> ( u8 , u8 ) {
148+ ( hex_4bit ( c >> 4 ) , hex_4bit ( c & 0x0F ) )
149+ }
150+
137151impl < ' a , B > ser:: Serializer for & ' a mut Serializer < B >
138152where
139153 B : heapless:: ArrayLength < u8 > ,
@@ -212,7 +226,66 @@ where
212226
213227 fn serialize_str ( self , v : & str ) -> Result < Self :: Ok > {
214228 self . buf . push ( b'"' ) ?;
215- self . buf . extend_from_slice ( v. as_bytes ( ) ) ?;
229+
230+
231+ // Do escaping according to "6. MUST represent all strings (including object member names) in
232+ // their minimal-length UTF-8 encoding": https://gibson042.github.io/canonicaljson-spec/
233+ //
234+ // We don't need to escape lone surrogates because surrogate pairs do not exist in valid UTF-8,
235+ // even if they can exist in JSON or JavaScript strings (UCS-2 based). As a result, lone surrogates
236+ // cannot exist in a Rust String. If they do, the bug is in the String constructor.
237+ // An excellent explanation is available at https://www.youtube.com/watch?v=HhIEDWmQS3w
238+
239+ // Temporary storage for encoded a single char.
240+ // A char is up to 4 bytes long wehn encoded to UTF-8.
241+ let mut encoding_tmp = [ 0u8 ; 4 ] ;
242+
243+ for c in v. chars ( ) {
244+ match c {
245+ '\\' => {
246+ self . buf . push ( b'\\' ) ?;
247+ self . buf . push ( b'\\' ) ?;
248+ }
249+ '"' => {
250+ self . buf . push ( b'\\' ) ?;
251+ self . buf . push ( b'"' ) ?;
252+ }
253+ '\u{0008}' => {
254+ self . buf . push ( b'\\' ) ?;
255+ self . buf . push ( b'b' ) ?;
256+ }
257+ '\u{0009}' => {
258+ self . buf . push ( b'\\' ) ?;
259+ self . buf . push ( b't' ) ?;
260+ }
261+ '\u{000A}' => {
262+ self . buf . push ( b'\\' ) ?;
263+ self . buf . push ( b'n' ) ?;
264+ }
265+ '\u{000C}' => {
266+ self . buf . push ( b'\\' ) ?;
267+ self . buf . push ( b'f' ) ?;
268+ }
269+ '\u{000D}' => {
270+ self . buf . push ( b'\\' ) ?;
271+ self . buf . push ( b'r' ) ?;
272+ }
273+ '\u{0000}' ..='\u{001F}' => {
274+ self . buf . push ( b'\\' ) ?;
275+ self . buf . push ( b'u' ) ?;
276+ self . buf . push ( b'0' ) ?;
277+ self . buf . push ( b'0' ) ?;
278+ let ( hex1, hex2) = hex ( c as u8 ) ;
279+ self . buf . push ( hex1) ?;
280+ self . buf . push ( hex2) ?;
281+ }
282+ _ => {
283+ let encoded = c. encode_utf8 ( & mut encoding_tmp as & mut [ u8 ] ) ;
284+ self . buf . extend_from_slice ( encoded. as_bytes ( ) ) ?;
285+ }
286+ }
287+ }
288+
216289 self . buf . push ( b'"' ) ?;
217290 Ok ( ( ) )
218291 }
@@ -472,6 +545,33 @@ mod tests {
472545 #[ test]
473546 fn str ( ) {
474547 assert_eq ! ( & * crate :: to_string:: <N , _>( "hello" ) . unwrap( ) , r#""hello""# ) ;
548+ assert_eq ! ( & * crate :: to_string:: <N , _>( "" ) . unwrap( ) , r#""""# ) ;
549+
550+ // Characters unescaped if possible
551+ assert_eq ! ( & * crate :: to_string:: <N , _>( "ä" ) . unwrap( ) , r#""ä""# ) ;
552+ assert_eq ! ( & * crate :: to_string:: <N , _>( "৬" ) . unwrap( ) , r#""৬""# ) ;
553+ // assert_eq!(&*crate::to_string::<N, _>("\u{A0}").unwrap(), r#"" ""#); // non-breaking space
554+ assert_eq ! ( & * crate :: to_string:: <N , _>( "ℝ" ) . unwrap( ) , r#""ℝ""# ) ; // 3 byte character
555+ assert_eq ! ( & * crate :: to_string:: <N , _>( "💣" ) . unwrap( ) , r#""💣""# ) ; // 4 byte character
556+
557+ // " and \ must be escaped
558+ assert_eq ! ( & * crate :: to_string:: <N , _>( "foo\" bar" ) . unwrap( ) , r#""foo\"bar""# ) ;
559+ assert_eq ! ( & * crate :: to_string:: <N , _>( "foo\\ bar" ) . unwrap( ) , r#""foo\\bar""# ) ;
560+
561+ // \b, \t, \n, \f, \r must be escaped in their two-character escaping
562+ assert_eq ! ( & * crate :: to_string:: <N , _>( " \u{0008} " ) . unwrap( ) , r#"" \b ""# ) ;
563+ assert_eq ! ( & * crate :: to_string:: <N , _>( " \u{0009} " ) . unwrap( ) , r#"" \t ""# ) ;
564+ assert_eq ! ( & * crate :: to_string:: <N , _>( " \u{000A} " ) . unwrap( ) , r#"" \n ""# ) ;
565+ assert_eq ! ( & * crate :: to_string:: <N , _>( " \u{000C} " ) . unwrap( ) , r#"" \f ""# ) ;
566+ assert_eq ! ( & * crate :: to_string:: <N , _>( " \u{000D} " ) . unwrap( ) , r#"" \r ""# ) ;
567+
568+ // U+0000 through U+001F is escaped using six-character \u00xx uppercase hexadecimal escape sequences
569+ assert_eq ! ( & * crate :: to_string:: <N , _>( " \u{0000} " ) . unwrap( ) , r#"" \u0000 ""# ) ;
570+ assert_eq ! ( & * crate :: to_string:: <N , _>( " \u{0001} " ) . unwrap( ) , r#"" \u0001 ""# ) ;
571+ assert_eq ! ( & * crate :: to_string:: <N , _>( " \u{0007} " ) . unwrap( ) , r#"" \u0007 ""# ) ;
572+ assert_eq ! ( & * crate :: to_string:: <N , _>( " \u{000e} " ) . unwrap( ) , r#"" \u000E ""# ) ;
573+ assert_eq ! ( & * crate :: to_string:: <N , _>( " \u{001D} " ) . unwrap( ) , r#"" \u001D ""# ) ;
574+ assert_eq ! ( & * crate :: to_string:: <N , _>( " \u{001f} " ) . unwrap( ) , r#"" \u001F ""# ) ;
475575 }
476576
477577 #[ test]
0 commit comments