@@ -5,15 +5,17 @@ mod tests;
55
66/// Finds all newlines, multi-byte characters, and non-narrow characters in a
77/// SourceFile.
8- ///
9- /// This function will use an SSE2 enhanced implementation if hardware support
10- /// is detected at runtime.
118pub ( crate ) fn analyze_source_file ( src : & str ) -> ( Vec < RelativeBytePos > , Vec < MultiByteChar > ) {
129 let mut lines = vec ! [ RelativeBytePos :: from_u32( 0 ) ] ;
1310 let mut multi_byte_chars = vec ! [ ] ;
1411
15- // Calls the right implementation, depending on hardware support available.
16- analyze_source_file_dispatch ( src, & mut lines, & mut multi_byte_chars) ;
12+ analyze_source_file_generic (
13+ src,
14+ src. len ( ) ,
15+ RelativeBytePos ( 0 ) ,
16+ & mut lines,
17+ & mut multi_byte_chars,
18+ ) ;
1719
1820 // The code above optimistically registers a new line *after* each \n
1921 // it encounters. If that point is already outside the source_file, remove
@@ -29,256 +31,6 @@ pub(crate) fn analyze_source_file(src: &str) -> (Vec<RelativeBytePos>, Vec<Multi
2931 ( lines, multi_byte_chars)
3032}
3133
32- #[ cfg( bootstrap) ]
33- cfg_match ! {
34- cfg( any( target_arch = "x86" , target_arch = "x86_64" ) ) => {
35- fn analyze_source_file_dispatch(
36- src: & str ,
37- lines: & mut Vec <RelativeBytePos >,
38- multi_byte_chars: & mut Vec <MultiByteChar >,
39- ) {
40- if is_x86_feature_detected!( "sse2" ) {
41- unsafe {
42- analyze_source_file_sse2( src, lines, multi_byte_chars) ;
43- }
44- } else {
45- analyze_source_file_generic(
46- src,
47- src. len( ) ,
48- RelativeBytePos :: from_u32( 0 ) ,
49- lines,
50- multi_byte_chars,
51- ) ;
52- }
53- }
54-
55- /// Checks 16 byte chunks of text at a time. If the chunk contains
56- /// something other than printable ASCII characters and newlines, the
57- /// function falls back to the generic implementation. Otherwise it uses
58- /// SSE2 intrinsics to quickly find all newlines.
59- #[ target_feature( enable = "sse2" ) ]
60- unsafe fn analyze_source_file_sse2(
61- src: & str ,
62- lines: & mut Vec <RelativeBytePos >,
63- multi_byte_chars: & mut Vec <MultiByteChar >,
64- ) {
65- #[ cfg( target_arch = "x86" ) ]
66- use std:: arch:: x86:: * ;
67- #[ cfg( target_arch = "x86_64" ) ]
68- use std:: arch:: x86_64:: * ;
69-
70- const CHUNK_SIZE : usize = 16 ;
71-
72- let src_bytes = src. as_bytes( ) ;
73-
74- let chunk_count = src. len( ) / CHUNK_SIZE ;
75-
76- // This variable keeps track of where we should start decoding a
77- // chunk. If a multi-byte character spans across chunk boundaries,
78- // we need to skip that part in the next chunk because we already
79- // handled it.
80- let mut intra_chunk_offset = 0 ;
81-
82- for chunk_index in 0 ..chunk_count {
83- let ptr = src_bytes. as_ptr( ) as * const __m128i;
84- // We don't know if the pointer is aligned to 16 bytes, so we
85- // use `loadu`, which supports unaligned loading.
86- let chunk = unsafe { _mm_loadu_si128( ptr. add( chunk_index) ) } ;
87-
88- // For character in the chunk, see if its byte value is < 0, which
89- // indicates that it's part of a UTF-8 char.
90- let multibyte_test = unsafe { _mm_cmplt_epi8( chunk, _mm_set1_epi8( 0 ) ) } ;
91- // Create a bit mask from the comparison results.
92- let multibyte_mask = unsafe { _mm_movemask_epi8( multibyte_test) } ;
93-
94- // If the bit mask is all zero, we only have ASCII chars here:
95- if multibyte_mask == 0 {
96- assert!( intra_chunk_offset == 0 ) ;
97-
98- // Check for newlines in the chunk
99- let newlines_test = unsafe { _mm_cmpeq_epi8( chunk, _mm_set1_epi8( b'\n' as i8 ) ) } ;
100- let mut newlines_mask = unsafe { _mm_movemask_epi8( newlines_test) } ;
101-
102- let output_offset = RelativeBytePos :: from_usize( chunk_index * CHUNK_SIZE + 1 ) ;
103-
104- while newlines_mask != 0 {
105- let index = newlines_mask. trailing_zeros( ) ;
106-
107- lines. push( RelativeBytePos ( index) + output_offset) ;
108-
109- // Clear the bit, so we can find the next one.
110- newlines_mask &= newlines_mask - 1 ;
111- }
112- } else {
113- // The slow path.
114- // There are multibyte chars in here, fallback to generic decoding.
115- let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
116- intra_chunk_offset = analyze_source_file_generic(
117- & src[ scan_start..] ,
118- CHUNK_SIZE - intra_chunk_offset,
119- RelativeBytePos :: from_usize( scan_start) ,
120- lines,
121- multi_byte_chars,
122- ) ;
123- }
124- }
125-
126- // There might still be a tail left to analyze
127- let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
128- if tail_start < src. len( ) {
129- analyze_source_file_generic(
130- & src[ tail_start..] ,
131- src. len( ) - tail_start,
132- RelativeBytePos :: from_usize( tail_start) ,
133- lines,
134- multi_byte_chars,
135- ) ;
136- }
137- }
138- }
139- _ => {
140- // The target (or compiler version) does not support SSE2 ...
141- fn analyze_source_file_dispatch(
142- src: & str ,
143- lines: & mut Vec <RelativeBytePos >,
144- multi_byte_chars: & mut Vec <MultiByteChar >,
145- ) {
146- analyze_source_file_generic(
147- src,
148- src. len( ) ,
149- RelativeBytePos :: from_u32( 0 ) ,
150- lines,
151- multi_byte_chars,
152- ) ;
153- }
154- }
155- }
156-
157- #[ cfg( not( bootstrap) ) ]
158- cfg_match ! {
159- any( target_arch = "x86" , target_arch = "x86_64" ) => {
160- fn analyze_source_file_dispatch(
161- src: & str ,
162- lines: & mut Vec <RelativeBytePos >,
163- multi_byte_chars: & mut Vec <MultiByteChar >,
164- ) {
165- if is_x86_feature_detected!( "sse2" ) {
166- unsafe {
167- analyze_source_file_sse2( src, lines, multi_byte_chars) ;
168- }
169- } else {
170- analyze_source_file_generic(
171- src,
172- src. len( ) ,
173- RelativeBytePos :: from_u32( 0 ) ,
174- lines,
175- multi_byte_chars,
176- ) ;
177- }
178- }
179-
180- /// Checks 16 byte chunks of text at a time. If the chunk contains
181- /// something other than printable ASCII characters and newlines, the
182- /// function falls back to the generic implementation. Otherwise it uses
183- /// SSE2 intrinsics to quickly find all newlines.
184- #[ target_feature( enable = "sse2" ) ]
185- unsafe fn analyze_source_file_sse2(
186- src: & str ,
187- lines: & mut Vec <RelativeBytePos >,
188- multi_byte_chars: & mut Vec <MultiByteChar >,
189- ) {
190- #[ cfg( target_arch = "x86" ) ]
191- use std:: arch:: x86:: * ;
192- #[ cfg( target_arch = "x86_64" ) ]
193- use std:: arch:: x86_64:: * ;
194-
195- const CHUNK_SIZE : usize = 16 ;
196-
197- let src_bytes = src. as_bytes( ) ;
198-
199- let chunk_count = src. len( ) / CHUNK_SIZE ;
200-
201- // This variable keeps track of where we should start decoding a
202- // chunk. If a multi-byte character spans across chunk boundaries,
203- // we need to skip that part in the next chunk because we already
204- // handled it.
205- let mut intra_chunk_offset = 0 ;
206-
207- for chunk_index in 0 ..chunk_count {
208- let ptr = src_bytes. as_ptr( ) as * const __m128i;
209- // We don't know if the pointer is aligned to 16 bytes, so we
210- // use `loadu`, which supports unaligned loading.
211- let chunk = unsafe { _mm_loadu_si128( ptr. add( chunk_index) ) } ;
212-
213- // For character in the chunk, see if its byte value is < 0, which
214- // indicates that it's part of a UTF-8 char.
215- let multibyte_test = unsafe { _mm_cmplt_epi8( chunk, _mm_set1_epi8( 0 ) ) } ;
216- // Create a bit mask from the comparison results.
217- let multibyte_mask = unsafe { _mm_movemask_epi8( multibyte_test) } ;
218-
219- // If the bit mask is all zero, we only have ASCII chars here:
220- if multibyte_mask == 0 {
221- assert!( intra_chunk_offset == 0 ) ;
222-
223- // Check for newlines in the chunk
224- let newlines_test = unsafe { _mm_cmpeq_epi8( chunk, _mm_set1_epi8( b'\n' as i8 ) ) } ;
225- let mut newlines_mask = unsafe { _mm_movemask_epi8( newlines_test) } ;
226-
227- let output_offset = RelativeBytePos :: from_usize( chunk_index * CHUNK_SIZE + 1 ) ;
228-
229- while newlines_mask != 0 {
230- let index = newlines_mask. trailing_zeros( ) ;
231-
232- lines. push( RelativeBytePos ( index) + output_offset) ;
233-
234- // Clear the bit, so we can find the next one.
235- newlines_mask &= newlines_mask - 1 ;
236- }
237- } else {
238- // The slow path.
239- // There are multibyte chars in here, fallback to generic decoding.
240- let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
241- intra_chunk_offset = analyze_source_file_generic(
242- & src[ scan_start..] ,
243- CHUNK_SIZE - intra_chunk_offset,
244- RelativeBytePos :: from_usize( scan_start) ,
245- lines,
246- multi_byte_chars,
247- ) ;
248- }
249- }
250-
251- // There might still be a tail left to analyze
252- let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
253- if tail_start < src. len( ) {
254- analyze_source_file_generic(
255- & src[ tail_start..] ,
256- src. len( ) - tail_start,
257- RelativeBytePos :: from_usize( tail_start) ,
258- lines,
259- multi_byte_chars,
260- ) ;
261- }
262- }
263- }
264- _ => {
265- // The target (or compiler version) does not support SSE2 ...
266- fn analyze_source_file_dispatch(
267- src: & str ,
268- lines: & mut Vec <RelativeBytePos >,
269- multi_byte_chars: & mut Vec <MultiByteChar >,
270- ) {
271- analyze_source_file_generic(
272- src,
273- src. len( ) ,
274- RelativeBytePos :: from_u32( 0 ) ,
275- lines,
276- multi_byte_chars,
277- ) ;
278- }
279- }
280- }
281-
28234// `scan_len` determines the number of bytes in `src` to scan. Note that the
28335// function can read past `scan_len` if a multi-byte character start within the
28436// range but extends past it. The overflow is returned by the function.
0 commit comments