1+
2+ pub use crate :: error:: { Error , ErrorType } ;
3+ pub use crate :: Deserializer ;
4+ pub use crate :: Result ;
5+ pub use crate :: neon:: stage1:: * ;
6+ pub use crate :: neon:: utf8check:: * ;
7+ pub use crate :: neon:: intrinsics:: * ;
8+ pub use crate :: stringparse:: * ;
9+
10+ impl < ' de > Deserializer < ' de > {
11+ #[ cfg_attr( not( feature = "no-inline" ) , inline( always) ) ]
12+ pub fn parse_str_ ( & mut self ) -> Result < & ' de str > {
13+ // Add 1 to skip the initial "
14+ let idx = self . iidx + 1 ;
15+ let mut padding = [ 0u8 ; 32 ] ;
16+ //let mut read: usize = 0;
17+
18+ // we include the terminal '"' so we know where to end
19+ // This is safe since we check sub's lenght in the range access above and only
20+ // create sub sliced form sub to `sub.len()`.
21+
22+ let src: & [ u8 ] = unsafe { & self . input . get_unchecked ( idx..) } ;
23+ let mut src_i: usize = 0 ;
24+ let mut len = src_i;
25+ loop {
26+ // store to dest unconditionally - we can overwrite the bits we don't like
27+ // later
28+
29+ let ( v0, v1) = if src. len ( ) >= src_i + 32 {
30+ // This is safe since we ensure src is at least 16 wide
31+ #[ allow( clippy:: cast_ptr_alignment) ]
32+ unsafe {
33+ (
34+ vld1q_u8 ( src. get_unchecked ( src_i..src_i + 16 ) . as_ptr ( ) ) ,
35+ vld1q_u8 ( src. get_unchecked ( src_i + 16 ..src_i + 32 ) . as_ptr ( ) ) ,
36+ )
37+ }
38+ } else {
39+ unsafe {
40+ padding
41+ . get_unchecked_mut ( ..src. len ( ) - src_i)
42+ . clone_from_slice ( src. get_unchecked ( src_i..) ) ;
43+ // This is safe since we ensure src is at least 32 wide
44+ (
45+ vld1q_u8 ( padding. get_unchecked ( 0 ..16 ) . as_ptr ( ) ) ,
46+ vld1q_u8 ( padding. get_unchecked ( 16 ..32 ) . as_ptr ( ) ) ,
47+ )
48+ }
49+ } ;
50+
51+ let ParseStringHelper { bs_bits, quote_bits } = find_bs_bits_and_quote_bits ( v0, v1) ;
52+
53+ if ( bs_bits. wrapping_sub ( 1 ) & quote_bits) != 0 {
54+ // we encountered quotes first. Move dst to point to quotes and exit
55+ // find out where the quote is...
56+ let quote_dist: u32 = quote_bits. trailing_zeros ( ) ;
57+
58+ ///////////////////////
59+ // Above, check for overflow in case someone has a crazy string (>=4GB?)
60+ // But only add the overflow check when the document itself exceeds 4GB
61+ // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
62+ ////////////////////////
63+
64+ // we advance the point, accounting for the fact that we have a NULl termination
65+
66+ len += quote_dist as usize ;
67+ unsafe {
68+ let v = self . input . get_unchecked ( idx..idx + len) as * const [ u8 ] as * const str ;
69+ return Ok ( & * v) ;
70+ }
71+
72+ // we compare the pointers since we care if they are 'at the same spot'
73+ // not if they are the same value
74+ }
75+ if ( quote_bits. wrapping_sub ( 1 ) & bs_bits) != 0 {
76+ // Move to the 'bad' character
77+ let bs_dist: u32 = bs_bits. trailing_zeros ( ) ;
78+ len += bs_dist as usize ;
79+ src_i += bs_dist as usize ;
80+ break ;
81+ } else {
82+ // they are the same. Since they can't co-occur, it means we encountered
83+ // neither.
84+ src_i += 32 ;
85+ len += 32 ;
86+ }
87+ }
88+
89+ let mut dst_i: usize = 0 ;
90+ let dst: & mut [ u8 ] = self . strings . as_mut_slice ( ) ;
91+
92+ loop {
93+ let ( v0, v1) = if src. len ( ) >= src_i + 32 {
94+ // This is safe since we ensure src is at least 16 wide
95+ #[ allow( clippy:: cast_ptr_alignment) ]
96+ unsafe {
97+ (
98+ vld1q_u8 ( src. get_unchecked ( src_i..src_i + 16 ) . as_ptr ( ) ) ,
99+ vld1q_u8 ( src. get_unchecked ( src_i + 16 ..src_i + 32 ) . as_ptr ( ) ) ,
100+ )
101+ }
102+ } else {
103+ unsafe {
104+ padding
105+ . get_unchecked_mut ( ..src. len ( ) - src_i)
106+ . clone_from_slice ( src. get_unchecked ( src_i..) ) ;
107+ // This is safe since we ensure src is at least 32 wide
108+ (
109+ vld1q_u8 ( padding. get_unchecked ( 0 ..16 ) . as_ptr ( ) ) ,
110+ vld1q_u8 ( padding. get_unchecked ( 16 ..32 ) . as_ptr ( ) ) ,
111+ )
112+ }
113+ } ;
114+
115+ unsafe {
116+ dst. get_unchecked_mut ( dst_i..dst_i + 32 ) . copy_from_slice ( src. get_unchecked ( src_i..src_i + 32 ) ) ;
117+ }
118+
119+ // store to dest unconditionally - we can overwrite the bits we don't like
120+ // later
121+ let ParseStringHelper { bs_bits, quote_bits } = find_bs_bits_and_quote_bits ( v0, v1) ;
122+
123+ if ( bs_bits. wrapping_sub ( 1 ) & quote_bits) != 0 {
124+ // we encountered quotes first. Move dst to point to quotes and exit
125+ // find out where the quote is...
126+ let quote_dist: u32 = quote_bits. trailing_zeros ( ) ;
127+
128+ ///////////////////////
129+ // Above, check for overflow in case someone has a crazy string (>=4GB?)
130+ // But only add the overflow check when the document itself exceeds 4GB
131+ // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
132+ ////////////////////////
133+
134+ // we advance the point, accounting for the fact that we have a NULl termination
135+
136+ dst_i += quote_dist as usize ;
137+ unsafe {
138+ self . input
139+ . get_unchecked_mut ( idx + len..idx + len + dst_i)
140+ . clone_from_slice ( & self . strings . get_unchecked ( ..dst_i) ) ;
141+ let v = self . input . get_unchecked ( idx..idx + len + dst_i) as * const [ u8 ]
142+ as * const str ;
143+ self . str_offset += dst_i as usize ;
144+ return Ok ( & * v) ;
145+ }
146+
147+ // we compare the pointers since we care if they are 'at the same spot'
148+ // not if they are the same value
149+ }
150+ if ( quote_bits. wrapping_sub ( 1 ) & bs_bits) != 0 {
151+ // find out where the backspace is
152+ let bs_dist: u32 = bs_bits. trailing_zeros ( ) ;
153+ let escape_char: u8 = unsafe { * src. get_unchecked ( src_i + bs_dist as usize + 1 ) } ;
154+ // we encountered backslash first. Handle backslash
155+ if escape_char == b'u' {
156+ // move src/dst up to the start; they will be further adjusted
157+ // within the unicode codepoint handling code.
158+ src_i += bs_dist as usize ;
159+ dst_i += bs_dist as usize ;
160+ let ( o, s) = if let Ok ( r) = handle_unicode_codepoint (
161+ unsafe { src. get_unchecked ( src_i..) } ,
162+ unsafe { dst. get_unchecked_mut ( dst_i..) }
163+ )
164+ {
165+ r
166+ } else {
167+ return Err ( self . error ( ErrorType :: InvlaidUnicodeCodepoint ) ) ;
168+ } ;
169+ if o == 0 {
170+ return Err ( self . error ( ErrorType :: InvlaidUnicodeCodepoint ) ) ;
171+ } ;
172+ // We moved o steps forword at the destiation and 6 on the source
173+ src_i += s;
174+ dst_i += o;
175+ } else {
176+ // simple 1:1 conversion. Will eat bs_dist+2 characters in input and
177+ // write bs_dist+1 characters to output
178+ // note this may reach beyond the part of the buffer we've actually
179+ // seen. I think this is ok
180+ let escape_result: u8 =
181+ unsafe { * ESCAPE_MAP . get_unchecked ( escape_char as usize ) } ;
182+ if escape_result == 0 {
183+ return Err ( self . error ( ErrorType :: InvalidEscape ) ) ;
184+ }
185+ unsafe {
186+ * dst. get_unchecked_mut ( dst_i + bs_dist as usize ) = escape_result;
187+ }
188+ src_i += bs_dist as usize + 2 ;
189+ dst_i += bs_dist as usize + 1 ;
190+ }
191+ } else {
192+ // they are the same. Since they can't co-occur, it means we encountered
193+ // neither.
194+ src_i += 32 ;
195+ dst_i += 32 ;
196+ }
197+ }
198+ }
199+ }
0 commit comments