@@ -95,6 +95,14 @@ pub enum Error {
9595
9696impl serde:: de:: StdError for Error { }
9797
98+ impl From < crate :: str:: StringUnescapeError > for Error {
99+ fn from ( error : crate :: str:: StringUnescapeError ) -> Self {
100+ match error {
101+ crate :: str:: StringUnescapeError :: InvalidEscapeSequence => Self :: InvalidEscapeSequence ,
102+ }
103+ }
104+ }
105+
98106/// A structure that deserializes Rust values from JSON in a buffer.
99107pub struct Deserializer < ' b , ' s > {
100108 slice : & ' b [ u8 ] ,
@@ -485,89 +493,43 @@ impl<'a, 'de, 's> de::Deserializer<'de> for &'a mut Deserializer<'de, 's> {
485493 where
486494 V : Visitor < ' de > ,
487495 {
488- let s = self . parse_str ( ) ?;
496+ let escaped_string = self . parse_str ( ) ?;
489497
490498 if let Some ( string_unescape_buffer) = self . string_unescape_buffer . as_deref_mut ( ) {
491- if s . as_bytes ( ) . contains ( & b'\\' ) {
492- let mut string_unescape_buffer_slots = string_unescape_buffer . iter_mut ( ) ;
499+ if escaped_string . as_bytes ( ) . contains ( & b'\\' ) {
500+ let mut string_unescape_buffer_write_position = 0 ;
493501
494- // We've already checked that the string is valid UTF-8, so the only b'\\' is the start of escape sequence
495- let mut escaped_string_bytes = s . as_bytes ( ) . iter ( ) ;
502+ for fragment in crate :: str :: unescape_fragments ( escaped_string ) {
503+ let char_encode_buffer = & mut [ 0 ; 4 ] ;
496504
497- loop {
498- match escaped_string_bytes. next ( ) . copied ( ) {
499- None => break ,
500- Some ( b'\\' ) => {
501- let unescaped_byte = match escaped_string_bytes. next ( ) {
502- Some ( b'"' ) => b'"' ,
503- Some ( b'\\' ) => b'\\' ,
504- Some ( b'/' ) => b'/' ,
505- Some ( b'b' ) => 0x8 ,
506- Some ( b'f' ) => 0xC ,
507- Some ( b'n' ) => b'\n' ,
508- Some ( b'r' ) => b'\r' ,
509- Some ( b't' ) => b'\t' ,
510- Some ( b'u' ) => {
511- // TODO - Replace with `<[u8]>::split_first_chunk::<4>` once MSRV >= 1.77
512- fn split_first_slice (
513- bytes : & [ u8 ] ,
514- len : usize ,
515- ) -> Option < ( & [ u8 ] , & [ u8 ] ) >
516- {
517- Some ( ( bytes. get ( ..len) ?, bytes. get ( len..) ?) )
518- }
519-
520- let ( escape_sequence, remaining_escaped_string_bytes) =
521- split_first_slice ( escaped_string_bytes. as_slice ( ) , 4 )
522- . ok_or ( Error :: InvalidEscapeSequence ) ?;
523-
524- escaped_string_bytes = remaining_escaped_string_bytes. iter ( ) ;
525-
526- let unescaped_char = core:: str:: from_utf8 ( escape_sequence)
527- . ok ( )
528- . and_then ( |escape_sequence| {
529- u32:: from_str_radix ( escape_sequence, 16 ) . ok ( )
530- } )
531- . and_then ( char:: from_u32)
532- . ok_or ( Error :: InvalidEscapeSequence ) ?;
533-
534- for & unescaped_byte in
535- unescaped_char. encode_utf8 ( & mut [ 0 ; 4 ] ) . as_bytes ( )
536- {
537- * string_unescape_buffer_slots
538- . next ( )
539- . ok_or ( Error :: EscapedStringIsTooLong ) ? = unescaped_byte;
540- }
541-
542- continue ;
543- }
544- _ => return Err ( Error :: InvalidEscapeSequence ) ,
545- } ;
546-
547- * string_unescape_buffer_slots
548- . next ( )
549- . ok_or ( Error :: EscapedStringIsTooLong ) ? = unescaped_byte;
505+ let unescaped_bytes = match fragment? {
506+ crate :: str:: EscapedStringFragment :: NotEscaped ( fragment) => {
507+ fragment. as_bytes ( )
550508 }
551- Some ( c) => {
552- * string_unescape_buffer_slots
553- . next ( )
554- . ok_or ( Error :: EscapedStringIsTooLong ) ? = c;
509+ crate :: str:: EscapedStringFragment :: Escaped ( c) => {
510+ c. encode_utf8 ( char_encode_buffer) . as_bytes ( )
555511 }
556- }
557- }
512+ } ;
513+
514+ string_unescape_buffer[ string_unescape_buffer_write_position..]
515+ . get_mut ( ..unescaped_bytes. len ( ) )
516+ . ok_or ( Error :: EscapedStringIsTooLong ) ?
517+ . copy_from_slice ( unescaped_bytes) ;
558518
559- let remaining_length = string_unescape_buffer_slots . len ( ) ;
560- let unescaped_string_length = string_unescape_buffer . len ( ) - remaining_length ;
519+ string_unescape_buffer_write_position += unescaped_bytes . len ( ) ;
520+ }
561521
562522 visitor. visit_str (
563- str:: from_utf8 ( & string_unescape_buffer[ ..unescaped_string_length] )
564- . map_err ( |_| Error :: InvalidUnicodeCodePoint ) ?,
523+ str:: from_utf8 (
524+ & string_unescape_buffer[ ..string_unescape_buffer_write_position] ,
525+ )
526+ . map_err ( |_| Error :: InvalidUnicodeCodePoint ) ?,
565527 )
566528 } else {
567- visitor. visit_borrowed_str ( s )
529+ visitor. visit_borrowed_str ( escaped_string )
568530 }
569531 } else {
570- visitor. visit_borrowed_str ( s )
532+ visitor. visit_borrowed_str ( escaped_string )
571533 }
572534 }
573535
@@ -638,11 +600,34 @@ impl<'a, 'de, 's> de::Deserializer<'de> for &'a mut Deserializer<'de, 's> {
638600 }
639601
640602 /// Unsupported. We can’t parse newtypes because we don’t know the underlying type.
641- fn deserialize_newtype_struct < V > ( self , _name : & ' static str , visitor : V ) -> Result < V :: Value >
603+ fn deserialize_newtype_struct < V > ( self , name : & ' static str , visitor : V ) -> Result < V :: Value >
642604 where
643605 V : Visitor < ' de > ,
644606 {
645- visitor. visit_newtype_struct ( self )
607+ if name == crate :: str:: EscapedStr :: NAME {
608+ struct EscapedStringDeserializer < ' a , ' de , ' s > ( & ' a mut Deserializer < ' de , ' s > ) ;
609+
610+ impl < ' a , ' de , ' s > serde:: Deserializer < ' de > for EscapedStringDeserializer < ' a , ' de , ' s > {
611+ type Error = Error ;
612+
613+ fn deserialize_any < V > ( self , visitor : V ) -> Result < V :: Value >
614+ where
615+ V : Visitor < ' de > ,
616+ {
617+ visitor. visit_borrowed_str ( self . 0 . parse_str ( ) ?)
618+ }
619+
620+ serde:: forward_to_deserialize_any! {
621+ bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
622+ bytes byte_buf option unit unit_struct newtype_struct seq tuple
623+ tuple_struct map struct enum identifier ignored_any
624+ }
625+ }
626+
627+ visitor. visit_newtype_struct ( EscapedStringDeserializer ( self ) )
628+ } else {
629+ visitor. visit_newtype_struct ( self )
630+ }
646631 }
647632
648633 fn deserialize_seq < V > ( self , visitor : V ) -> Result < V :: Value >
@@ -1058,6 +1043,14 @@ mod tests {
10581043 ) ;
10591044 }
10601045
1046+ #[ test]
1047+ fn escaped_str ( ) {
1048+ assert_eq ! (
1049+ crate :: from_str( r#""Hello\nWorld""# ) ,
1050+ Ok ( ( crate :: str :: EscapedStr :: new( r#"Hello\nWorld"# ) . unwrap( ) , 14 ) )
1051+ ) ;
1052+ }
1053+
10611054 #[ test]
10621055 fn struct_bool ( ) {
10631056 #[ derive( Debug , Deserialize , PartialEq ) ]
0 commit comments