@@ -81,8 +81,8 @@ cfg_select! {
8181 // use `loadu`, which supports unaligned loading.
8282 let chunk = unsafe { _mm_loadu_si128(chunk.as_ptr() as *const __m128i) };
8383
84- // For each character in the chunk, see if its byte value is < 0,
85- // which indicates that it's part of a UTF-8 char.
84+ // For character in the chunk, see if its byte value is < 0, which
85+ // indicates that it's part of a UTF-8 char.
8686 let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0));
8787 // Create a bit mask from the comparison results.
8888 let multibyte_mask = _mm_movemask_epi8(multibyte_test);
@@ -132,111 +132,8 @@ cfg_select! {
132132 }
133133 }
134134 }
135- target_arch = "loongarch64" => {
136- fn analyze_source_file_dispatch(
137- src: &str,
138- lines: &mut Vec<RelativeBytePos>,
139- multi_byte_chars: &mut Vec<MultiByteChar>,
140- ) {
141- use std::arch::is_loongarch_feature_detected;
142-
143- if is_loongarch_feature_detected!("lsx") {
144- unsafe {
145- analyze_source_file_lsx(src, lines, multi_byte_chars);
146- }
147- } else {
148- analyze_source_file_generic(
149- src,
150- src.len(),
151- RelativeBytePos::from_u32(0),
152- lines,
153- multi_byte_chars,
154- );
155- }
156- }
157-
158- /// Checks 16 byte chunks of text at a time. If the chunk contains
159- /// something other than printable ASCII characters and newlines, the
160- /// function falls back to the generic implementation. Otherwise it uses
161- /// LSX intrinsics to quickly find all newlines.
162- #[target_feature(enable = "lsx")]
163- unsafe fn analyze_source_file_lsx(
164- src: &str,
165- lines: &mut Vec<RelativeBytePos>,
166- multi_byte_chars: &mut Vec<MultiByteChar>,
167- ) {
168- use std::arch::loongarch64::*;
169-
170- const CHUNK_SIZE: usize = 16;
171-
172- let (chunks, tail) = src.as_bytes().as_chunks::<CHUNK_SIZE>();
173-
174- // This variable keeps track of where we should start decoding a
175- // chunk. If a multi-byte character spans across chunk boundaries,
176- // we need to skip that part in the next chunk because we already
177- // handled it.
178- let mut intra_chunk_offset = 0;
179-
180- for (chunk_index, chunk) in chunks.iter().enumerate() {
181- // All LSX memory instructions support unaligned access, so using
182- // vld is fine.
183- let chunk = unsafe { lsx_vld::<0>(chunk.as_ptr() as *const i8) };
184-
185- // For each character in the chunk, see if its byte value is < 0,
186- // which indicates that it's part of a UTF-8 char.
187- let multibyte_mask = lsx_vmskltz_b(chunk);
188- // Create a bit mask from the comparison results.
189- let multibyte_mask = lsx_vpickve2gr_w::<0>(multibyte_mask);
190-
191- // If the bit mask is all zero, we only have ASCII chars here:
192- if multibyte_mask == 0 {
193- assert!(intra_chunk_offset == 0);
194-
195- // Check for newlines in the chunk
196- let newlines_test = lsx_vseqi_b::<{b'\n' as i32}>(chunk);
197- let newlines_mask = lsx_vmskltz_b(newlines_test);
198- let mut newlines_mask = lsx_vpickve2gr_w::<0>(newlines_mask);
199-
200- let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
201-
202- while newlines_mask != 0 {
203- let index = newlines_mask.trailing_zeros();
204-
205- lines.push(RelativeBytePos(index) + output_offset);
206-
207- // Clear the bit, so we can find the next one.
208- newlines_mask &= newlines_mask - 1;
209- }
210- } else {
211- // The slow path.
212- // There are multibyte chars in here, fallback to generic decoding.
213- let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
214- intra_chunk_offset = analyze_source_file_generic(
215- &src[scan_start..],
216- CHUNK_SIZE - intra_chunk_offset,
217- RelativeBytePos::from_usize(scan_start),
218- lines,
219- multi_byte_chars,
220- );
221- }
222- }
223-
224- // There might still be a tail left to analyze
225- let tail_start = src.len() - tail.len() + intra_chunk_offset;
226- if tail_start < src.len() {
227- analyze_source_file_generic(
228- &src[tail_start..],
229- src.len() - tail_start,
230- RelativeBytePos::from_usize(tail_start),
231- lines,
232- multi_byte_chars,
233- );
234- }
235- }
236- }
237135 _ => {
238- // The target (or compiler version) does not support vector instructions
239- // our specialized implementations need (x86 SSE2, loongarch64 LSX)...
136+ // The target (or compiler version) does not support SSE2 ...
240137 fn analyze_source_file_dispatch(
241138 src: &str,
242139 lines: &mut Vec<RelativeBytePos>,
0 commit comments