@@ -234,25 +234,27 @@ const unsafe fn resolve_error_location(st: u32, bytes: &[u8], i: usize) -> Utf8E
234234}
235235
236236// The simpler but slower algorithm to run DFA with error handling.
237+ // Returns the final state after execution on the whole slice.
237238//
238239// # Safety
239240// The caller must ensure `bytes[..i]` is a valid UTF-8 prefix and `st` is the DFA state after
240241// executing on `bytes[..i]`.
242+ #[ inline]
241243const unsafe fn run_with_error_handling (
242- st : & mut u32 ,
244+ mut st : u32 ,
243245 bytes : & [ u8 ] ,
244246 mut i : usize ,
245- ) -> Result < ( ) , Utf8Error > {
247+ ) -> Result < u32 , Utf8Error > {
246248 while i < bytes. len ( ) {
247- let new_st = next_state ( * st, bytes[ i] ) ;
249+ let new_st = next_state ( st, bytes[ i] ) ;
248250 if new_st & STATE_MASK == ST_ERROR {
249251 // SAFETY: Guaranteed by the caller.
250- return Err ( unsafe { resolve_error_location ( * st, bytes, i) } ) ;
252+ return Err ( unsafe { resolve_error_location ( st, bytes, i) } ) ;
251253 }
252- * st = new_st;
254+ st = new_st;
253255 i += 1 ;
254256 }
255- Ok ( ( ) )
257+ Ok ( st )
256258}
257259
258260/// Walks through `v` checking that it's a valid UTF-8 sequence,
@@ -265,19 +267,15 @@ pub(super) const fn run_utf8_validation(bytes: &[u8]) -> Result<(), Utf8Error> {
265267
266268#[ inline]
267269const fn run_utf8_validation_const ( bytes : & [ u8 ] ) -> Result < ( ) , Utf8Error > {
268- let mut st = ST_ACCEPT ;
269270 // SAFETY: Start at empty string with valid state ACCEPT.
270- match unsafe { run_with_error_handling ( & mut st , bytes, 0 ) } {
271+ match unsafe { run_with_error_handling ( ST_ACCEPT , bytes, 0 ) } {
271272 Err ( err) => Err ( err) ,
272- Ok ( ( ) ) => {
273- if st & STATE_MASK == ST_ACCEPT {
274- Ok ( ( ) )
275- } else {
276- // SAFETY: `st` is the last state after execution without encountering any error.
277- let mut err = unsafe { resolve_error_location ( st, bytes, bytes. len ( ) ) } ;
278- err. error_len = Utf8ErrorLen :: Eof ;
279- Err ( err)
280- }
273+ Ok ( st) if st & STATE_MASK == ST_ACCEPT => Ok ( ( ) ) ,
274+ Ok ( st) => {
275+ // SAFETY: `st` is the last state after execution without encountering any error.
276+ let mut err = unsafe { resolve_error_location ( st, bytes, bytes. len ( ) ) } ;
277+ err. error_len = Utf8ErrorLen :: Eof ;
278+ Err ( err)
281279 }
282280 }
283281}
@@ -288,10 +286,9 @@ fn run_utf8_validation_rt(bytes: &[u8]) -> Result<(), Utf8Error> {
288286 const ASCII_CHUNK_SIZE : usize = 16 ;
289287 const { assert ! ( ASCII_CHUNK_SIZE % MAIN_CHUNK_SIZE == 0 ) } ;
290288
291- let mut st = ST_ACCEPT ;
292289 let mut i = bytes. len ( ) % MAIN_CHUNK_SIZE ;
293290 // SAFETY: Start at initial state ACCEPT.
294- unsafe { run_with_error_handling ( & mut st , & bytes[ ..i] , 0 ) ? } ;
291+ let mut st = unsafe { run_with_error_handling ( ST_ACCEPT , & bytes[ ..i] , 0 ) ? } ;
295292
296293 while i + MAIN_CHUNK_SIZE <= bytes. len ( ) {
297294 // Fast path: if the current state is ACCEPT, we can skip to the next non-ASCII chunk.
@@ -326,7 +323,8 @@ fn run_utf8_validation_rt(bytes: &[u8]) -> Result<(), Utf8Error> {
326323 }
327324 if new_st & STATE_MASK == ST_ERROR {
328325 // SAFETY: `st` is the last state after executing `bytes[..i]` without encountering any error.
329- return unsafe { run_with_error_handling ( & mut st, bytes, i) } ;
326+ // And we know the next chunk must fail the validation.
327+ return Err ( unsafe { run_with_error_handling ( st, bytes, i) . unwrap_err_unchecked ( ) } ) ;
330328 }
331329
332330 st = new_st;
0 commit comments