@@ -573,6 +573,52 @@ namespace xsimd
573573 return vld1q_f32 (src);
574574 }
575575
576+ /* batch bool version */
577+ template <class T , class A , detail::enable_sized_t <T, 1 > = 0 >
578+ XSIMD_INLINE batch_bool<T, A> load_unaligned (bool const * mem, batch_bool<T, A>, requires_arch<neon>) noexcept
579+ {
580+ auto vmem = load_unaligned<A>((unsigned char const *)mem, convert<unsigned char > {}, A {});
581+ return { 0 - vmem.data };
582+ }
583+ template <class T , class A , detail::enable_sized_t <T, 1 > = 0 >
584+ XSIMD_INLINE batch_bool<T, A> load_aligned (bool const * mem, batch_bool<T, A> t, requires_arch<neon> r) noexcept
585+ {
586+ return load_unaligned (mem, t, r);
587+ }
588+
589+ template <class T , class A , detail::enable_sized_t <T, 2 > = 0 >
590+ XSIMD_INLINE batch_bool<T, A> load_unaligned (bool const * mem, batch_bool<T, A>, requires_arch<neon>) noexcept
591+ {
592+ uint16x8_t vmem = vmovl_u8 (vld1_u8 ((unsigned char const *)mem));
593+ return { 0 - vmem };
594+ }
595+
596+ template <class T , class A , detail::enable_sized_t <T, 2 > = 0 >
597+ XSIMD_INLINE batch_bool<T, A> load_aligned (bool const * mem, batch_bool<T, A> t, requires_arch<neon> r) noexcept
598+ {
599+ return load_unaligned (mem, t, r);
600+ }
601+
602+ template <class T , class A , detail::enable_sized_t <T, 4 > = 0 >
603+ XSIMD_INLINE batch_bool<T, A> load_unaligned (bool const * mem, batch_bool<T, A>, requires_arch<neon>) noexcept
604+ {
605+ uint8x8_t tmp = vreinterpret_u8_u32 (vset_lane_u32 (*(unsigned int *)mem, vdup_n_u32 (0 ), 0 ));
606+ return { 0 - vmovl_u16 (vget_low_u16 (vmovl_u8 (tmp))) };
607+ }
608+
609+ template <class T , class A , detail::enable_sized_t <T, 4 > = 0 >
610+ XSIMD_INLINE batch_bool<T, A> load_aligned (bool const * mem, batch_bool<T, A> t, requires_arch<neon> r) noexcept
611+ {
612+ return load_unaligned (mem, t, r);
613+ }
614+
615+ template <class A >
616+ XSIMD_INLINE batch_bool<float , A> load_aligned (bool const * mem, batch_bool<float , A> t, requires_arch<neon> r) noexcept
617+ {
618+ uint8x8_t tmp = vreinterpret_u8_u32 (vset_lane_u32 (*(unsigned int *)mem, vdup_n_u32 (0 ), 0 ));
619+ return { 0 - vmovl_u16 (vget_low_u16 (vmovl_u8 (tmp))) };
620+ }
621+
576622 /* ********
577623 * store *
578624 *********/
0 commit comments