Skip to content

Commit eb17eaa

Browse files
Extend #1172 approach to arm - store version
1 parent 62a008c commit eb17eaa

File tree

2 files changed

+47
-0
lines changed

2 files changed

+47
-0
lines changed

include/xsimd/arch/xsimd_neon.hpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -724,6 +724,43 @@ namespace xsimd
724724
store_complex_aligned(dst, src, A {});
725725
}
726726

727+
/*********************
728+
* store<batch_bool> *
729+
*********************/
730+
template <class T, class A, detail::enable_sized_t<T, 1> = 0>
731+
XSIMD_INLINE void store(batch_bool<T, A> b, bool* mem, requires_arch<neon>) noexcept
732+
{
733+
uint8x16_t val = vshrq_n_u8(b.data, 7);
734+
vst1q_u8((uint8_t*)mem, val);
735+
}
736+
737+
template <class T, class A, detail::enable_sized_t<T, 2> = 0>
738+
XSIMD_INLINE void store(batch_bool<T, A> b, bool* mem, requires_arch<neon>) noexcept
739+
{
740+
uint8x8_t val = vshr_n_u8(vqmovn_u16(b.data), 7);
741+
vst1_u8((uint8_t*)mem, val);
742+
}
743+
744+
template <class T, class A, detail::enable_sized_t<T, 4> = 0>
745+
XSIMD_INLINE void store(batch_bool<T, A> b, bool* mem, requires_arch<neon>) noexcept
746+
{
747+
uint8x8_t val = vshr_n_u8(vqmovn_u16(vcombine_u16(vqmovn_u32(b.data), vdup_n_u16(0))), 7);
748+
vst1_lane_u32((uint32_t*)mem, vreinterpret_u32_u8(val), 0);
749+
}
750+
751+
template <class T, class A, detail::enable_sized_t<T, 8> = 0>
752+
XSIMD_INLINE void store(batch_bool<T, A> b, bool* mem, requires_arch<neon>) noexcept
753+
{
754+
uint8x8_t val = vshr_n_u8(vqmovn_u16(vcombine_u16(vqmovn_u32(vcombine_u32(vqmovn_u64(b.data), vdup_n_u32(0))), vdup_n_u16(0))), 7);
755+
vst1_lane_u16((uint16_t*)mem, vreinterpret_u16_u8(val), 0);
756+
}
757+
758+
template <class A>
759+
XSIMD_INLINE void store(batch_bool<float, A> b, bool* mem, requires_arch<neon>) noexcept
760+
{
761+
store(batch_bool<uint32_t, A>(b.data), mem, A {});
762+
}
763+
727764
/*******
728765
* neg *
729766
*******/

include/xsimd/arch/xsimd_neon64.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,16 @@ namespace xsimd
177177
return store_aligned<A>(dst, src, A {});
178178
}
179179

180+
/*********************
181+
* store<batch_bool> *
182+
*********************/
183+
184+
template <class A>
185+
XSIMD_INLINE void store(batch_bool<double, A> b, bool* mem, requires_arch<neon>) noexcept
186+
{
187+
store(batch_bool<uint64_t, A>(b.data), mem, A {});
188+
}
189+
180190
/****************
181191
* load_complex *
182192
****************/

0 commit comments

Comments
 (0)