@@ -41,44 +41,44 @@ unsafe fn read_usize_unaligned(x: *const usize) -> usize {
4141 core:: mem:: transmute ( x_read)
4242}
4343
44- /// Loads a `T`-sized chunk from `src` into `dst` at offset `offset`, if that does not exceed
45- /// `load_sz`. The offset pointers must both be `T`-aligned. Returns the new offset, advanced by the
46- /// chunk size if a load happened.
47- #[ cfg( not( feature = "mem-unaligned" ) ) ]
4844#[ inline( always) ]
49- unsafe fn load_chunk_aligned < T : Copy > (
50- src : * const usize ,
51- dst : * mut usize ,
52- load_sz : usize ,
53- offset : usize ,
54- ) -> usize {
55- let chunk_sz = core:: mem:: size_of :: < T > ( ) ;
56- if ( load_sz & chunk_sz) != 0 {
57- * dst. wrapping_byte_add ( offset) . cast :: < T > ( ) = * src. wrapping_byte_add ( offset) . cast :: < T > ( ) ;
58- offset | chunk_sz
59- } else {
60- offset
45+ unsafe fn copy_forward_bytes ( mut dest : * mut u8 , mut src : * const u8 , n : usize ) {
46+ let dest_end = dest. wrapping_add ( n) ;
47+ while dest < dest_end {
48+ * dest = * src;
49+ dest = dest. wrapping_add ( 1 ) ;
50+ src = src. wrapping_add ( 1 ) ;
6151 }
6252}
6353
64- /// Load `load_sz` many bytes from `src`, which must be usize-aligned. Acts as if we did a `usize`
65- /// read with the out-of-bounds part filled with 0s.
66- /// `load_sz` be strictly less than `WORD_SIZE`.
54+ /// Load `load_sz` many bytes from `src`, which must be usize-aligned.
55+ /// `load_sz` be strictly less than `WORD_SIZE`. The remaining bytes are filled non-deterministically.
6756#[ cfg( not( feature = "mem-unaligned" ) ) ]
6857#[ inline( always) ]
6958unsafe fn load_aligned_partial ( src : * const usize , load_sz : usize ) -> usize {
7059 debug_assert ! ( load_sz < WORD_SIZE ) ;
71- // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
72- // (since `load_sz < WORD_SIZE`).
73- const { assert ! ( WORD_SIZE <= 8 ) } ;
60+ debug_assert ! ( src. addr( ) % WORD_SIZE == 0 ) ;
61+
62+ let mut out: usize ;
63+ core:: cfg_match! {
64+ // We don't need an x86 path here as `feature = "mem-unaligned"` is always set there.
65+ all( not( miri) , any( target_arch = "arm" , target_arch = "aarch64" , target_arch = "arm64ec" ) ) => {
66+ unsafe {
67+ core:: arch:: asm!(
68+ "ldr {out}, [{src}]" ,
69+ src = in( reg) src,
70+ out = lateout( reg) out,
71+ options( nostack, readonly, preserves_flags) ,
72+ ) ;
73+ }
74+ }
75+ _ => {
76+ out = 0 ;
77+ copy_forward_bytes( & raw mut out as * mut u8 , src as * mut u8 , load_sz) ;
78+ }
79+
80+ }
7481
75- let mut i = 0 ;
76- let mut out = 0usize ;
77- // We load in decreasing order, so the pointers remain sufficiently aligned for the next step.
78- i = load_chunk_aligned :: < u32 > ( src, & raw mut out, load_sz, i) ;
79- i = load_chunk_aligned :: < u16 > ( src, & raw mut out, load_sz, i) ;
80- i = load_chunk_aligned :: < u8 > ( src, & raw mut out, load_sz, i) ;
81- debug_assert ! ( i == load_sz) ;
8282 out
8383}
8484
@@ -90,35 +90,36 @@ unsafe fn load_aligned_partial(src: *const usize, load_sz: usize) -> usize {
9090#[ inline( always) ]
9191unsafe fn load_aligned_end_partial ( src : * const usize , load_sz : usize ) -> usize {
9292 debug_assert ! ( load_sz < WORD_SIZE ) ;
93- // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
94- // (since `load_sz < WORD_SIZE`).
95- const { assert ! ( WORD_SIZE <= 8 ) } ;
93+ debug_assert ! ( src. addr( ) % WORD_SIZE == 0 ) ;
94+
95+ let mut out: usize ;
96+ core:: cfg_match! {
97+ // We don't need an x86 path here as `feature = "mem-unaligned"` is always set there.
98+ all( not( miri) , any( target_arch = "arm" , target_arch = "aarch64" , target_arch = "arm64ec" ) ) => {
99+ unsafe {
100+ core:: arch:: asm!(
101+ "ldr {out}, [{src}]" ,
102+ src = in( reg) src,
103+ out = lateout( reg) out,
104+ options( nostack, readonly, preserves_flags) ,
105+ ) ;
106+ }
107+ }
108+ _ => {
109+ out = 0 ;
110+ // Obtain pointers pointing to the beginning of the range we want to load.
111+ let src_shifted = src. wrapping_byte_add( WORD_SIZE - load_sz) ;
112+ let out_shifted = ( & raw mut out) . wrapping_byte_add( WORD_SIZE - load_sz) ;
113+ copy_forward_bytes( out_shifted as * mut u8 , src_shifted as * mut u8 , load_sz) ;
114+ }
115+
116+ }
96117
97- let mut i = 0 ;
98- let mut out = 0usize ;
99- // Obtain pointers pointing to the beginning of the range we want to load.
100- let src_shifted = src. wrapping_byte_add ( WORD_SIZE - load_sz) ;
101- let out_shifted = ( & raw mut out) . wrapping_byte_add ( WORD_SIZE - load_sz) ;
102- // We load in increasing order, so by the time we reach `u16` things are 2-aligned etc.
103- i = load_chunk_aligned :: < u8 > ( src_shifted, out_shifted, load_sz, i) ;
104- i = load_chunk_aligned :: < u16 > ( src_shifted, out_shifted, load_sz, i) ;
105- i = load_chunk_aligned :: < u32 > ( src_shifted, out_shifted, load_sz, i) ;
106- debug_assert ! ( i == load_sz) ;
107118 out
108119}
109120
110121#[ inline( always) ]
111122pub unsafe fn copy_forward ( mut dest : * mut u8 , mut src : * const u8 , mut n : usize ) {
112- #[ inline( always) ]
113- unsafe fn copy_forward_bytes ( mut dest : * mut u8 , mut src : * const u8 , n : usize ) {
114- let dest_end = dest. wrapping_add ( n) ;
115- while dest < dest_end {
116- * dest = * src;
117- dest = dest. wrapping_add ( 1 ) ;
118- src = src. wrapping_add ( 1 ) ;
119- }
120- }
121-
122123 #[ inline( always) ]
123124 unsafe fn copy_forward_aligned_words ( dest : * mut u8 , src : * const u8 , n : usize ) {
124125 let mut dest_usize = dest as * mut usize ;
0 commit comments