@@ -176,34 +176,70 @@ where
176176 unsafe { & mut * ( self as * mut Self as * mut [ T ; N ] ) }
177177 }
178178
179+ /// Load a vector from an array of `T`.
180+ ///
181+ /// This function is necessary since `repr(simd)` has padding for non-power-of-2 vectors (at the time of writing).
182+ /// With padding, `read_unaligned` will read past the end of an array of N elements.
183+ ///
184+ /// # Safety
185+ /// Reading `ptr` must be safe, as if by `<*const [T; N]>::read_unaligned`.
186+ const unsafe fn load ( ptr : * const [ T ; N ] ) -> Self {
187+ // There are potentially simpler ways to write this function, but this should result in
188+ // LLVM `load <N x T>`
189+
190+ let mut tmp = core:: mem:: MaybeUninit :: < Self > :: uninit ( ) ;
191+ // SAFETY: `Simd<T, N>` always contains `N` elements of type `T`. It may have padding
192+ // which does not need to be initialized. The safety of reading `ptr` is ensured by the
193+ // caller.
194+ unsafe {
195+ core:: ptr:: copy_nonoverlapping ( ptr, tmp. as_mut_ptr ( ) . cast ( ) , 1 ) ;
196+ tmp. assume_init ( )
197+ }
198+ }
199+
200+ /// Store a vector to an array of `T`.
201+ ///
202+ /// See `load` as to why this function is necessary.
203+ ///
204+ /// # Safety
205+ /// Writing to `ptr` must be safe, as if by `<*mut [T; N]>::write_unaligned`.
206+ const unsafe fn store ( self , ptr : * mut [ T ; N ] ) {
207+ // There are potentially simpler ways to write this function, but this should result in
208+ // LLVM `store <N x T>`
209+
210+ // Creating a temporary helps LLVM turn the memcpy into a store.
211+ let tmp = self ;
212+ // SAFETY: `Simd<T, N>` always contains `N` elements of type `T`. The safety of writing
213+ // `ptr` is ensured by the caller.
214+ unsafe { core:: ptr:: copy_nonoverlapping ( tmp. as_array ( ) , ptr, 1 ) }
215+ }
216+
179217 /// Converts an array to a SIMD vector.
180218 pub const fn from_array ( array : [ T ; N ] ) -> Self {
181- // SAFETY: Transmuting between `Simd<T, N>` and `[T; N]`
182- // is always valid. We need to use `read_unaligned` here, since
183- // the array may have a lower alignment than the vector.
219+ // SAFETY: `&array` is safe to read.
184220 //
185- // FIXME: We currently use a pointer read instead of `transmute_copy` because
186- // it results in better codegen with optimizations disabled, but we should
187- // probably just use `transmute` once that works on const generic types.
221+ // FIXME: We currently use a pointer load instead of `transmute_copy` because `repr(simd)`
222+ // results in padding for non-power-of-2 vectors (so vectors are larger than arrays).
188223 //
189224 // NOTE: This deliberately doesn't just use `Self(array)`, see the comment
190225 // on the struct definition for details.
191- unsafe { ( & array as * const [ T ; N ] as * const Self ) . read_unaligned ( ) }
226+ unsafe { Self :: load ( & array) }
192227 }
193228
194229 /// Converts a SIMD vector to an array.
195230 pub const fn to_array ( self ) -> [ T ; N ] {
196- // SAFETY: Transmuting between `Simd<T, N>` and `[T; N]`
197- // is always valid. No need to use `read_unaligned` here, since
198- // the vector never has a lower alignment than the array.
231+ let mut tmp = core:: mem:: MaybeUninit :: uninit ( ) ;
232+ // SAFETY: writing to `tmp` is safe and initializes it.
199233 //
200- // FIXME: We currently use a pointer read instead of `transmute_copy` because
201- // it results in better codegen with optimizations disabled, but we should
202- // probably just use `transmute` once that works on const generic types.
234+ // FIXME: We currently use a pointer store instead of `transmute_copy` because `repr(simd)`
235+ // results in padding for non-power-of-2 vectors (so vectors are larger than arrays).
203236 //
204237 // NOTE: This deliberately doesn't just use `self.0`, see the comment
205238 // on the struct definition for details.
206- unsafe { ( & self as * const Self as * const [ T ; N ] ) . read ( ) }
239+ unsafe {
240+ self . store ( tmp. as_mut_ptr ( ) ) ;
241+ tmp. assume_init ( )
242+ }
207243 }
208244
209245 /// Converts a slice to a SIMD vector containing `slice[..N]`.
0 commit comments