@@ -94,6 +94,66 @@ impl<'a> Serializer<'a> {
9494 Ok ( ( ) )
9595 }
9696 }
97+
98+ fn push_char ( & mut self , c : char ) -> Result < ( ) > {
99+ // Do escaping according to "6. MUST represent all strings (including object member names) in
100+ // their minimal-length UTF-8 encoding": https://gibson042.github.io/canonicaljson-spec/
101+ //
102+ // We don't need to escape lone surrogates because surrogate pairs do not exist in valid UTF-8,
103+ // even if they can exist in JSON or JavaScript strings (UCS-2 based). As a result, lone surrogates
104+ // cannot exist in a Rust String. If they do, the bug is in the String constructor.
105+ // An excellent explanation is available at https://www.youtube.com/watch?v=HhIEDWmQS3w
106+
107+ // Temporary storage for encoded a single char.
108+ // A char is up to 4 bytes long wehn encoded to UTF-8.
109+ let mut encoding_tmp = [ 0u8 ; 4 ] ;
110+
111+ match c {
112+ '\\' => {
113+ self . push ( b'\\' ) ?;
114+ self . push ( b'\\' ) ?;
115+ }
116+ '"' => {
117+ self . push ( b'\\' ) ?;
118+ self . push ( b'"' ) ?;
119+ }
120+ '\u{0008}' => {
121+ self . push ( b'\\' ) ?;
122+ self . push ( b'b' ) ?;
123+ }
124+ '\u{0009}' => {
125+ self . push ( b'\\' ) ?;
126+ self . push ( b't' ) ?;
127+ }
128+ '\u{000A}' => {
129+ self . push ( b'\\' ) ?;
130+ self . push ( b'n' ) ?;
131+ }
132+ '\u{000C}' => {
133+ self . push ( b'\\' ) ?;
134+ self . push ( b'f' ) ?;
135+ }
136+ '\u{000D}' => {
137+ self . push ( b'\\' ) ?;
138+ self . push ( b'r' ) ?;
139+ }
140+ '\u{0000}' ..='\u{001F}' => {
141+ self . push ( b'\\' ) ?;
142+ self . push ( b'u' ) ?;
143+ self . push ( b'0' ) ?;
144+ self . push ( b'0' ) ?;
145+ let ( hex1, hex2) = hex ( c as u8 ) ;
146+ self . push ( hex1) ?;
147+ self . push ( hex2) ?;
148+ }
149+ _ => {
150+ let encoded = c. encode_utf8 ( & mut encoding_tmp as & mut [ u8 ] ) ;
151+ self . extend_from_slice ( encoded. as_bytes ( ) ) ?;
152+ }
153+ }
154+
155+ Ok ( ( ) )
156+ }
97157}
98158
99159// NOTE(serialize_*signed) This is basically the numtoa implementation minus the lookup tables,
@@ -263,62 +323,8 @@ impl<'a, 'b: 'a> ser::Serializer for &'a mut Serializer<'b> {
263323 fn serialize_str ( self , v : & str ) -> Result < Self :: Ok > {
264324 self . push ( b'"' ) ?;
265325
266- // Do escaping according to "6. MUST represent all strings (including object member names) in
267- // their minimal-length UTF-8 encoding": https://gibson042.github.io/canonicaljson-spec/
268- //
269- // We don't need to escape lone surrogates because surrogate pairs do not exist in valid UTF-8,
270- // even if they can exist in JSON or JavaScript strings (UCS-2 based). As a result, lone surrogates
271- // cannot exist in a Rust String. If they do, the bug is in the String constructor.
272- // An excellent explanation is available at https://www.youtube.com/watch?v=HhIEDWmQS3w
273-
274- // Temporary storage for encoded a single char.
275- // A char is up to 4 bytes long wehn encoded to UTF-8.
276- let mut encoding_tmp = [ 0u8 ; 4 ] ;
277-
278326 for c in v. chars ( ) {
279- match c {
280- '\\' => {
281- self . push ( b'\\' ) ?;
282- self . push ( b'\\' ) ?;
283- }
284- '"' => {
285- self . push ( b'\\' ) ?;
286- self . push ( b'"' ) ?;
287- }
288- '\u{0008}' => {
289- self . push ( b'\\' ) ?;
290- self . push ( b'b' ) ?;
291- }
292- '\u{0009}' => {
293- self . push ( b'\\' ) ?;
294- self . push ( b't' ) ?;
295- }
296- '\u{000A}' => {
297- self . push ( b'\\' ) ?;
298- self . push ( b'n' ) ?;
299- }
300- '\u{000C}' => {
301- self . push ( b'\\' ) ?;
302- self . push ( b'f' ) ?;
303- }
304- '\u{000D}' => {
305- self . push ( b'\\' ) ?;
306- self . push ( b'r' ) ?;
307- }
308- '\u{0000}' ..='\u{001F}' => {
309- self . push ( b'\\' ) ?;
310- self . push ( b'u' ) ?;
311- self . push ( b'0' ) ?;
312- self . push ( b'0' ) ?;
313- let ( hex1, hex2) = hex ( c as u8 ) ;
314- self . push ( hex1) ?;
315- self . push ( hex2) ?;
316- }
317- _ => {
318- let encoded = c. encode_utf8 ( & mut encoding_tmp as & mut [ u8 ] ) ;
319- self . extend_from_slice ( encoded. as_bytes ( ) ) ?;
320- }
321- }
327+ self . push_char ( c) ?;
322328 }
323329
324330 self . push ( b'"' )
@@ -434,11 +440,40 @@ impl<'a, 'b: 'a> ser::Serializer for &'a mut Serializer<'b> {
434440 Ok ( SerializeStructVariant :: new ( self ) )
435441 }
436442
437- fn collect_str < T : ?Sized > ( self , _value : & T ) -> Result < Self :: Ok >
443+ fn collect_str < T : ?Sized > ( self , value : & T ) -> Result < Self :: Ok >
438444 where
439445 T : fmt:: Display ,
440446 {
441- unreachable ! ( )
447+ self . push ( b'"' ) ?;
448+
449+ let mut col = StringCollector :: new ( self ) ;
450+ fmt:: write ( & mut col, format_args ! ( "{}" , value) ) . or ( Err ( Error :: BufferFull ) ) ?;
451+
452+ self . push ( b'"' )
453+ }
454+ }
455+
456+ struct StringCollector < ' a , ' b > {
457+ ser : & ' a mut Serializer < ' b > ,
458+ }
459+
460+ impl < ' a , ' b > StringCollector < ' a , ' b > {
461+ pub fn new ( ser : & ' a mut Serializer < ' b > ) -> Self {
462+ Self { ser }
463+ }
464+
465+ fn do_write_str ( & mut self , s : & str ) -> Result < ( ) > {
466+ for c in s. chars ( ) {
467+ self . ser . push_char ( c) ?;
468+ }
469+
470+ Ok ( ( ) )
471+ }
472+ }
473+
474+ impl < ' a , ' b > fmt:: Write for StringCollector < ' a , ' b > {
475+ fn write_str ( & mut self , s : & str ) -> fmt:: Result {
476+ self . do_write_str ( s) . or ( Err ( fmt:: Error ) )
442477 }
443478}
444479
0 commit comments