Skip to content

Commit f63bd07

Browse files
Add support for xsimd::widen on ARM
Related to #1179
1 parent d3e6e13 commit f63bd07

File tree

2 files changed

+45
-0
lines changed

2 files changed

+45
-0
lines changed

include/xsimd/arch/xsimd_neon.hpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
#include "../types/xsimd_neon_register.hpp"
2222
#include "../types/xsimd_utils.hpp"
23+
#include "./common/xsimd_common_cast.hpp"
2324

2425
// Wrap intrinsics so we can pass them as function pointers
2526
// - OP: intrinsics name prefix, e.g., vorrq
@@ -3211,6 +3212,41 @@ namespace xsimd
32113212
{
32123213
return vreinterpretq_f32_u32(swizzle(batch<uint32_t, A>(vreinterpretq_u32_f32(self)), mask, A {}));
32133214
}
3215+
3216+
/*********
3217+
* widen *
3218+
*********/
3219+
template <class A, class T, detail::enable_sized_signed_t<T, 1> = 0>
3220+
XSIMD_INLINE std::array<batch<widen_t<T>, A>, 2> widen(batch<T, A> const& x, requires_arch<neon>) noexcept
3221+
{
3222+
return { batch<widen_t<T>, A>(vaddl_s8(vget_low_s8(x), vdup_n_s8(0))), batch<widen_t<T>, A>(vaddl_s8(vget_high_s8(x), vdup_n_s8(0))) };
3223+
}
3224+
template <class A, class T, detail::enable_sized_unsigned_t<T, 1> = 0>
3225+
XSIMD_INLINE std::array<batch<widen_t<T>, A>, 2> widen(batch<T, A> const& x, requires_arch<neon>) noexcept
3226+
{
3227+
return { batch<widen_t<T>, A>(vaddl_u8(vget_low_u8(x), vdup_n_u8(0))), batch<widen_t<T>, A>(vaddl_u8(vget_high_u8(x), vdup_n_u8(0))) };
3228+
}
3229+
template <class A, class T, detail::enable_sized_signed_t<T, 2> = 0>
3230+
XSIMD_INLINE std::array<batch<widen_t<T>, A>, 2> widen(batch<T, A> const& x, requires_arch<neon>) noexcept
3231+
{
3232+
return { batch<widen_t<T>, A>(vaddl_s16(vget_low_s16(x), vdup_n_s16(0))), batch<widen_t<T>, A>(vaddl_s16(vget_high_s16(x), vdup_n_s16(0))) };
3233+
}
3234+
template <class A, class T, detail::enable_sized_unsigned_t<T, 2> = 0>
3235+
XSIMD_INLINE std::array<batch<widen_t<T>, A>, 2> widen(batch<T, A> const& x, requires_arch<neon>) noexcept
3236+
{
3237+
return { batch<widen_t<T>, A>(vaddl_u16(vget_low_u16(x), vdup_n_u16(0))), batch<widen_t<T>, A>(vaddl_u16(vget_high_u16(x), vdup_n_u16(0))) };
3238+
}
3239+
template <class A, class T, detail::enable_sized_signed_t<T, 4> = 0>
3240+
XSIMD_INLINE std::array<batch<widen_t<T>, A>, 2> widen(batch<T, A> const& x, requires_arch<neon>) noexcept
3241+
{
3242+
return { batch<widen_t<T>, A>(vaddl_s32(vget_low_s32(x), vdup_n_s32(0))), batch<widen_t<T>, A>(vaddl_s32(vget_high_s32(x), vdup_n_s32(0))) };
3243+
}
3244+
template <class A, class T, detail::enable_sized_unsigned_t<T, 4> = 0>
3245+
XSIMD_INLINE std::array<batch<widen_t<T>, A>, 2> widen(batch<T, A> const& x, requires_arch<neon>) noexcept
3246+
{
3247+
return { batch<widen_t<T>, A>(vaddl_u32(vget_low_u32(x), vdup_n_u32(0))), batch<widen_t<T>, A>(vaddl_u32(vget_high_u32(x), vdup_n_u32(0))) };
3248+
}
3249+
32143250
}
32153251

32163252
}

include/xsimd/arch/xsimd_neon64.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1547,6 +1547,15 @@ namespace xsimd
15471547
{
15481548
return batch<std::complex<double>>(swizzle(self.real(), idx, A()), swizzle(self.imag(), idx, A()));
15491549
}
1550+
1551+
/*********
1552+
* widen *
1553+
*********/
1554+
template <class A, class T>
1555+
XSIMD_INLINE std::array<batch<double, A>, 2> widen(batch<float, A> const& x, requires_arch<neon64>) noexcept
1556+
{
1557+
return { batch<double, A>(vcvt_f64_f32(vget_low_f32(x))), batch<double, A>(vcvt_high_f64_f32(x)) };
1558+
}
15501559
}
15511560
}
15521561

0 commit comments

Comments
 (0)